00001 00002 00003 00004 00005 00006 00007 00008 00009 00010 00011 00012 00013 00014 00015 00016 00017 00018 00019
00020
00021 #include "_gocr.h"
00022 #include "hash.h"
00023 #include "unicode.h"
00024 #include <stdarg.h>
00025
00026 static HashTable blockAttrib, charAttrib;
00027
00028 struct charattribute {
00029
00030 unsigned char *format;
00031 gocrCharAttributeType type;
00032 int index;
00033 union {
00034 int settable;
00035 unsigned char *value;
00036 } data;
00037 };
00038
00039 int _gocr_initUnicode ( void ) {
00040 struct initial {
00041 char *name;
00042 gocrCharAttributeType type;
00043 char *format;
00044 };
00045
00046 const struct initial chardata[] = { { "BOLD", SETTABLE, NULL },
00047 { "ITALIC", SETTABLE, NULL },
00048 { "FONT", UNTIL_OVERRIDEN, "%s %d" } };
00049 int i;
00050
00051 if ( hash_init(&blockAttrib, 0xFF, NULL) == -1 ) {
00052 _gocr_debug(1, fprintf(_data.error, "_gocr_initUnicode: hash_init(block)");)
00053 return -1;
00054 }
00055 if ( hash_init(&charAttrib, 0xFF, NULL) == -1 ) {
00056 _gocr_debug(1, fprintf(_data.error, "_gocr_initUnicode: hash_init(char)");)
00057 return -1;
00058 }
00059
00060 // for ( i = 0; i < sizeof(blockdata)/sizeof(char *); i++ )
00061
00062 for ( i = 0; i < sizeof(chardata)/sizeof(char *); i++ )
00063 gocr_charAttributeRegister(chardata[i].name, chardata[i].type,
00064 chardata[i].format);
00065
00066 return 0;
00067 }
00068
00069 void _free_ca ( void *data ) {
00070 struct charattribute *ca = (struct charattribute *)data;
00071 if ( ca == NULL )
00072 return;
00073 if ( ca->format )
00074 free(ca->format);
00075 free(ca);
00076 }
00077
00078 void _gocr_endUnicode ( void ) {
00079 hash_free(&blockAttrib, NULL);
00080 hash_free(&charAttrib, _free_ca);
00081 }
00082
00083 int gocr_charAttributeRegister ( char *name, gocrCharAttributeType t,
00084 char *format ) {
00085 struct charattribute *ca;
00086
00087 _gocr_debug(3, fprintf(_data.error, "gocr_charAttributeCreate(%s, %d, %s)\n",
00088 name, t, format);)
00089
00090 if ( !name ) {
00091 _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeCreate: NULL name\n");)
00092 return -1;
00093 }
00094
00095 /* fill structure */
00096 ca = (struct charattribute *)malloc(sizeof(struct charattribute));
00097 if ( ca == NULL ) {
00098 _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeCreate: NULL malloc\n");)
00099 return -1;
00100 }
00101
00102 switch ( t ) {
00103 case SETTABLE:
00104 ca->data.settable = 0;
00105 break;
00106 case UNTIL_OVERRIDEN:
00107 ca->data.value = NULL;
00108 break;
00109 default:
00110 _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeInsert: unexistant type\n");)
00111 free(ca);
00112 return -1;
00113 }
00114 ca->type = t;
00115
00116 /* future: check format to see if it's a valid one */
00117 ca->format = ( format == NULL ? NULL : (unsigned char *)strdup(format) );
00118
00119 ca->index = hash_insert(&charAttrib, name, (void *)ca);
00120 if ( ca->index < 0 ) {
00121 _gocr_debug(1, fprintf(_data.error, "Hash error %d\n", ca->index);)
00122 if ( ca->format )
00123 free(ca->format);
00124 free(ca);
00125 }
00126
00127 return 0;
00128 }
00129
00130 int gocr_boxAttributeSet ( gocrBox *box, int action, char *name, ... ) {
00131 wchar_t *t;
00132 int length;
00133 struct charattribute *ca;
00134
00135 _gocr_debug(3, fprintf(_data.error, "gocr_charAttributeInsert(%p, %d, %s,...)",
00136 box, action, name);)
00137 if ( name == NULL ) {
00138 _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeInsert: NULL name\n");)
00139 return -1;
00140 }
00141
00142 ca = (struct charattribute *)hash_data(&charAttrib, name);
00143 if ( ca == NULL ) {
00144 _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeInsert: attribute not found\n");)
00145 return -1;
00146 }
00147
00148 if ( action == 1 ) { /* insert */
00149 unsigned char *buffer = NULL, *p;
00150
00151 /* check if it already exists */
00152 for ( t = box->attributes; *t != '\0'; t++ ) {
00153 if ( *t = gocr_setCharAttribute(ca->index) ) {
00154 _gocr_debug(2, fprintf(_data.error, "gocr_charAttributeInsert: attribute exists\n");)
00155 return -1;
00156 }
00157 }
00158
00159 /* create format string */
00160 if ( ca->format != NULL ) {
00161 int size = 100;
00162 va_list va;
00163
00164 va_start(va, name);
00165
00166 /* fill the buffer */
00167 buffer = (unsigned char *)malloc(size);
00168 if ( buffer == NULL ) {
00169 _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeInsert: NULL malloc\n");)
00170 return -1;
00171 }
00172
00173 /* sprintf, making sure it fits */
00174 while (1) {
00175 int nchars = vsnprintf (buffer, size, ca->format, va);
00176
00177 if (nchars > -1)
00178 break;
00179
00180 size *= 2;
00181 buffer = (unsigned char *)realloc(buffer, size);
00182 if ( buffer == NULL ) {
00183 _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeInsert: NULL realloc %d\n", size);)
00184 return -1;
00185 }
00186 }
00187 va_end(va);
00188
00189 }
00190 /* point t to the end of the the string, realloc to fit */
00191 length = (box->attributes == NULL ? 0 : wcslen(box->attributes));
00192 box->attributes = (wchar_t *)realloc(box->attributes, (length +
00193 (buffer == NULL ? 0 : strlen(buffer)) + 2)*sizeof(wchar_t));
00194 if ( box->attributes == NULL ) {
00195 _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeInsert: NULL wrealloc\n");)
00196 return -1;
00197 }
00198 t = box->attributes + wcslen(box->attributes);
00199
00200 /* and fill with the data */
00201 *t++ = gocr_setCharAttribute(ca->index);
00202 for ( p = buffer; *p != '\0'; p++ )
00203 *t++ = gocr_setCharAttributeData(*p);
00204 *t = '\0';
00205
00206 switch ( ca->type ) {
00207 case SETTABLE: /* it's a settable attribute, so set/unset it */
00208 ca->data.settable = !ca->data.settable;
00209 break;
00210 case UNTIL_OVERRIDEN:
00211 if ( ca->data.value )
00212 free(ca->data.value);
00213 ca->data.value = (unsigned char*)strdup(buffer);
00214 break;
00215 default:
00216 _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeInsert: unexistant type\n");)
00217 }
00218
00219 if ( buffer )
00220 free(buffer);
00221 }
00222 else if ( action == 0 ) { /* delete */
00223 wchar_t c = gocr_setCharAttribute(ca->index), *s;
00224
00225 if ( !box->attributes )
00226 return 0;
00227
00228 /* find the attribute */
00229 for ( t = box->attributes; *t != '\0'; t++ )
00230 if ( *t == c )
00231 break;
00232 if ( *t == '\0' ) /* not found */
00233 return 0;
00234
00235 /* find the end of the attribute */
00236 for ( s = t+1; gocr_ischarAttributeData(*s); s++ )
00237 ;
00238
00239 /* move the rest of the string, and realloc it */
00240 memmove(t, s, (wcslen(s)+1)*sizeof(wchar_t));
00241 box->attributes = (wchar_t *)realloc(box->attributes,
00242 (wcslen(box->attributes)+1)*sizeof(wchar_t));
00243
00244 switch ( ca->type ) {
00245 case SETTABLE: /* it's a settable attribute, so set/unset it */
00246 ca->data.settable = !ca->data.settable;
00247 break;
00248 case UNTIL_OVERRIDEN:
00249 /*TODO: must search the previous value, etc */
00250 _gocr_debug(0, fprintf(_data.error, "gocr_charAttributeInsert: UNTIL_OVERRIDEN not done yet;\n"
00251 "Unpredictable behaviour may occur.\n");)
00252 break;
00253 default:
00254 _gocr_debug(1, fprintf(_data.error, "gocr_charAttributeInsert: unexistant type\n");)
00255 }
00256 }
00257
00258 return 0;
00259 }
00260
00261 /* Arguments: the character (main), and the modifier (accent, etc). See the
00262 function if you want to know the modifiers.
00263 Description: This function intends to be a small helper, to avoid having
00264 to write switches in functions. It's therefore mainly to accents, and
00265 specially for the most usual ones. It supports the basic greek
00266 characters too, which is actually not very helpful.
00267 Returns: the unicode character corresponding to the composed character. */
00268 wchar_t gocr_compose ( wchar_t main, wchar_t modifier ) {
00269
00270 /* supported by now: part of ISO8859-1, basic greek characters */
00271 _gocr_debug(3, fprintf(_data.error, "compose(%l, %l)\n", main, modifier);)
00272 switch (modifier) {
00273 case UNICODE_NULL:
00274 case SPACE:
00275 return (wchar_t)main;
00276
00277 case APOSTROPHE: /* do NOT USE this. It's here for compatibility only.
00278 Use ACUTE_ACCENT instead. */
00279 _gocr_debug(2, fprintf( _data.error, "COMPOSE: got APOSTROPHE instead of ACUTE_ACCENT");)
00280 case ACUTE_ACCENT: /* acute/cedilla */
00281 switch (main) {
00282 case 'a': return LATIN_SMALL_LETTER_A_WITH_ACUTE;
00283 case 'A': return LATIN_CAPITAL_LETTER_A_WITH_ACUTE;
00284 case 'e': return LATIN_SMALL_LETTER_E_WITH_ACUTE;
00285 case 'E': return LATIN_CAPITAL_LETTER_E_WITH_ACUTE;
00286 case 'i': return LATIN_SMALL_LETTER_I_WITH_ACUTE;
00287 case 'I': return LATIN_CAPITAL_LETTER_I_WITH_ACUTE;
00288 case 'o': return LATIN_SMALL_LETTER_O_WITH_ACUTE;
00289 case 'O': return LATIN_CAPITAL_LETTER_O_WITH_ACUTE;
00290 case 'u': return LATIN_SMALL_LETTER_U_WITH_ACUTE;
00291 case 'U': return LATIN_CAPITAL_LETTER_U_WITH_ACUTE;
00292 case 'y': return LATIN_SMALL_LETTER_Y_WITH_ACUTE;
00293 case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_ACUTE;
00294 default: return(wchar_t)0;
00295 }
00296 break;
00297
00298 case CEDILLA:
00299 switch (main) {
00300 case 'c': return LATIN_SMALL_LETTER_C_WITH_CEDILLA;
00301 case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CEDILLA;
00302 }
00303 break;
00304
00305 case TILDE:
00306 switch (main) {
00307 case 'a': return LATIN_SMALL_LETTER_A_WITH_TILDE;
00308 case 'A': return LATIN_CAPITAL_LETTER_A_WITH_TILDE;
00309 case 'n': return LATIN_SMALL_LETTER_N_WITH_TILDE;
00310 case 'N': return LATIN_CAPITAL_LETTER_N_WITH_TILDE;
00311 case 'o': return LATIN_SMALL_LETTER_O_WITH_TILDE;
00312 case 'O': return LATIN_CAPITAL_LETTER_O_WITH_TILDE;
00313 default: return(wchar_t)0;
00314 }
00315 break;
00316 case GRAVE_ACCENT:
00317 switch (main) {
00318 case 'a': return LATIN_SMALL_LETTER_A_WITH_GRAVE;
00319 case 'A': return LATIN_CAPITAL_LETTER_A_WITH_GRAVE;
00320 case 'e': return LATIN_SMALL_LETTER_E_WITH_GRAVE;
00321 case 'E': return LATIN_CAPITAL_LETTER_E_WITH_GRAVE;
00322 case 'i': return LATIN_SMALL_LETTER_I_WITH_GRAVE;
00323 case 'I': return LATIN_CAPITAL_LETTER_I_WITH_GRAVE;
00324 case 'o': return LATIN_SMALL_LETTER_O_WITH_GRAVE;
00325 case 'O': return LATIN_CAPITAL_LETTER_O_WITH_GRAVE;
00326 case 'u': return LATIN_SMALL_LETTER_U_WITH_GRAVE;
00327 case 'U': return LATIN_CAPITAL_LETTER_U_WITH_GRAVE;
00328 default: return(wchar_t)0;
00329 }
00330 break;
00331 case QUOTATION_MARK: /* do NOT USE this. It's here for compatibility only.
00332 Use DIAERESIS instead. */
00333 _gocr_debug(2, fprintf( _data.error, "COMPOSE: QUOTATION_MARK instead of DIAERESIS");)
00334 case DIAERESIS:
00335 switch (main) {
00336 case 'a': return LATIN_SMALL_LETTER_A_WITH_DIAERESIS;
00337 case 'A': return LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS;
00338 case 'e': return LATIN_SMALL_LETTER_E_WITH_DIAERESIS;
00339 case 'E': return LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS;
00340 case 'i': return LATIN_SMALL_LETTER_I_WITH_DIAERESIS;
00341 case 'I': return LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS;
00342 case 'o': return LATIN_SMALL_LETTER_O_WITH_DIAERESIS;
00343 case 'O': return LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS;
00344 case 'u': return LATIN_SMALL_LETTER_U_WITH_DIAERESIS;
00345 case 'U': return LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS;
00346 case 'y': return LATIN_SMALL_LETTER_Y_WITH_DIAERESIS;
00347 case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
00348 default: return(wchar_t)0;
00349 }
00350 break;
00351 case CIRCUMFLEX_ACCENT:
00352 switch (main) {
00353 case 'a': return LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX;
00354 case 'A': return LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX;
00355 case 'e': return LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX;
00356 case 'E': return LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX;
00357 case 'i': return LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX;
00358 case 'I': return LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX;
00359 case 'o': return LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX;
00360 case 'O': return LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX;
00361 case 'u': return LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX;
00362 case 'U': return LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX;
00363 default: return(wchar_t)0;
00364 }
00365 break;
00366 case RING_ABOVE:
00367 switch (main) {
00368 case 'a': return LATIN_SMALL_LETTER_A_WITH_RING_ABOVE;
00369 case 'A': return LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE;
00370 default: return(wchar_t)0;
00371 }
00372 break;
00373 case 'e': /* e ligatures: ae, oe. */
00374 case 'E':
00375 switch (main) {
00376 case 'a': return LATIN_SMALL_LETTER_AE;
00377 case 'A': return LATIN_CAPITAL_LETTER_AE;
00378 case 'o': return LATIN_SMALL_LIGATURE_OE;
00379 case 'O': return LATIN_CAPITAL_LIGATURE_OE;
00380 default: return(wchar_t)0;
00381 }
00382 break;
00383 case 'g': /* greek */
00384 switch (main) {
00385 /* missing 0x37A-0x390 */
00386 /* weird cases: Q -> theta (it resembles a little, doesn't it?)
00387 V -> psi (what can I do?) */
00388 case 'A': return GREEK_CAPITAL_LETTER_ALPHA;
00389 case 'B': return GREEK_CAPITAL_LETTER_BETA;
00390 case 'G': return GREEK_CAPITAL_LETTER_GAMMA;
00391 case 'D': return GREEK_CAPITAL_LETTER_DELTA;
00392 case 'E': return GREEK_CAPITAL_LETTER_EPSILON;
00393 case 'Z': return GREEK_CAPITAL_LETTER_ZETA;
00394 case 'H': return GREEK_CAPITAL_LETTER_ETA;
00395 case 'Q': return GREEK_CAPITAL_LETTER_THETA;
00396 case 'I': return GREEK_CAPITAL_LETTER_IOTA;
00397 case 'K': return GREEK_CAPITAL_LETTER_KAPPA;
00398 case 'L': return GREEK_CAPITAL_LETTER_LAMDA;
00399 case 'M': return GREEK_CAPITAL_LETTER_MU;
00400 case 'N': return GREEK_CAPITAL_LETTER_NU;
00401 case 'X': return GREEK_CAPITAL_LETTER_XI;
00402 case 'O': return GREEK_CAPITAL_LETTER_OMICRON;
00403 case 'P': return GREEK_CAPITAL_LETTER_PI;
00404 case 'R': return GREEK_CAPITAL_LETTER_RHO;
00405 case 'S': return GREEK_CAPITAL_LETTER_SIGMA;
00406 case 'T': return GREEK_CAPITAL_LETTER_TAU;
00407 case 'Y': return GREEK_CAPITAL_LETTER_UPSILON;
00408 case 'F': return GREEK_CAPITAL_LETTER_PHI;
00409 case 'C': return GREEK_CAPITAL_LETTER_CHI;
00410 case 'V': return GREEK_CAPITAL_LETTER_PSI;
00411 case 'W': return GREEK_CAPITAL_LETTER_OMEGA;
00412 /*
00413 case '': return GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA;
00414 case '': return GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA;
00415 case '': return GREEK_SMALL_LETTER_ALPHA_WITH_TONOS;
00416 case '': return GREEK_SMALL_LETTER_EPSILON_WITH_TONOS;
00417 case '': return GREEK_SMALL_LETTER_ETA_WITH_TONOS;
00418 case '': return GREEK_SMALL_LETTER_IOTA_WITH_TONOS;
00419 case '': return GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS;
00420 */
00421 case 'a': return GREEK_SMALL_LETTER_ALPHA;
00422 case 'b': return GREEK_SMALL_LETTER_BETA;
00423 case 'g': return GREEK_SMALL_LETTER_GAMMA;
00424 case 'd': return GREEK_SMALL_LETTER_DELTA;
00425 case 'e': return GREEK_SMALL_LETTER_EPSILON;
00426 case 'z': return GREEK_SMALL_LETTER_ZETA;
00427 case 'h': return GREEK_SMALL_LETTER_ETA;
00428 case 'q': return GREEK_SMALL_LETTER_THETA;
00429 case 'i': return GREEK_SMALL_LETTER_IOTA;
00430 case 'k': return GREEK_SMALL_LETTER_KAPPA;
00431 case 'l': return GREEK_SMALL_LETTER_LAMDA;
00432 case 'm': return GREEK_SMALL_LETTER_MU;
00433 case 'n': return GREEK_SMALL_LETTER_NU;
00434 case 'x': return GREEK_SMALL_LETTER_XI;
00435 case 'o': return GREEK_SMALL_LETTER_OMICRON;
00436 case 'p': return GREEK_SMALL_LETTER_PI;
00437 case 'r': return GREEK_SMALL_LETTER_RHO;
00438 case '&': return GREEK_SMALL_LETTER_FINAL_SIGMA;
00439 case 's': return GREEK_SMALL_LETTER_SIGMA;
00440 case 't': return GREEK_SMALL_LETTER_TAU;
00441 case 'y': return GREEK_SMALL_LETTER_UPSILON;
00442 case 'f': return GREEK_SMALL_LETTER_PHI;
00443 case 'c': return GREEK_SMALL_LETTER_CHI;
00444 case 'v': return GREEK_SMALL_LETTER_PSI;
00445 case 'w': return GREEK_SMALL_LETTER_OMEGA;
00446 /*
00447 case '': return GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA;
00448 case '': return GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA;
00449 case '': return GREEK_SMALL_LETTER_OMICRON_WITH_TONOS;
00450 case '': return GREEK_SMALL_LETTER_UPSILON_WITH_TONOS;
00451 case '': return GREEK_SMALL_LETTER_OMEGA_WITH_TONOS;
00452 case '': return GREEK_BETA_SYMBOL;
00453 case '': return GREEK_THETA_SYMBOL;
00454 case '': return GREEK_UPSILON_WITH_HOOK_SYMBOL;
00455 case '': return GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL;
00456 case '': return GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL;
00457 case '': return GREEK_PHI_SYMBOL;
00458 case '': return GREEK_PI_SYMBOL;
00459 */
00460 default: return (wchar_t)0;
00461 }
00462 break;
00463 default:
00464 return (wchar_t)0;
00465 }
00466 }
00467
00468 #ifdef DEPRECATED
00469 /* this function won't be provided anymore, but is kept here because it may
00470 be useful when writing outputFormatter modules */
00471 #define UNDEFINED "X"
00472
00473 /* Arguments: character in Unicode format, type of format to convert to.
00474 Returns: a string containing the Unicode character converted to the chosen
00475 format. This string is statically allocated and should not be freed. */
00476 const unsigned char *decode(wchar_t c, FORMAT type) {
00477 static unsigned char d;
00478 switch (type) {
00479 case ISO8859_1:
00480 if ( c <= 0xFF ) { /* UNICODE == IS08859-1 */
00481 d = (unsigned char)c;
00482 return &d;
00483 }
00484 switch (c) { /* not found in list, but perhaps we can describe it */
00485 /* todo: add greek. GREEK_SMALL_LETTER_ALPHA = alpha */
00486
00487 /* general puctuation */
00488 case HYPHEN:
00489 return (const unsigned char *)"-";
00490 case FIGURE_DASH:
00491 case EN_DASH:
00492 return (const unsigned char *)"--";
00493 case EM_DASH:
00494 return (const unsigned char *)"---";
00495 case LEFT_SINGLE_QUOTATION_MARK:
00496 return (const unsigned char *)"`";
00497 case RIGHT_SINGLE_QUOTATION_MARK:
00498 return (const unsigned char *)"'";
00499 case SINGLE_LOW_9_QUOTATION_MARK:
00500 return (const unsigned char *)",";
00501 case SINGLE_HIGH_REVERSED_9_QUOTATION_MARK:
00502 return (const unsigned char *)UNDEFINED;
00503 case LEFT_DOUBLE_QUOTATION_MARK:
00504 return (const unsigned char *)"``";
00505 case RIGHT_DOUBLE_QUOTATION_MARK:
00506 return (const unsigned char *)"''";
00507 case DOUBLE_LOW_9_QUOTATION_MARK:
00508 return (const unsigned char *)",,";
00509 case DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK:
00510 return (const unsigned char *)UNDEFINED;
00511 case DAGGER:
00512 return (const unsigned char *)"+";
00513 case DOUBLE_DAGGER:
00514 return (const unsigned char *)"*";
00515 case BULLET:
00516 return (const unsigned char *)"*";
00517 case TRIANGULAR_BULLET:
00518 return (const unsigned char *)"*";
00519 case HYPHENATION_POINT:
00520 return (const unsigned char *)"-";
00521 case HORIZONTAL_ELLIPSIS:
00522 return (const unsigned char *)"...";
00523 case PER_MILLE_SIGN:
00524 return (const unsigned char *)"%%"; /* awk! */
00525 case SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK:
00526 return (const unsigned char *)"<";
00527 case SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK:
00528 return (const unsigned char *)">";
00529
00530 /* ligatures */
00531 case LATIN_SMALL_LIGATURE_FF:
00532 return (const unsigned char *)"ff";
00533 case LATIN_SMALL_LIGATURE_FI:
00534 return (const unsigned char *)"fi";
00535 case LATIN_SMALL_LIGATURE_FL:
00536 return (const unsigned char *)"fl";
00537 case LATIN_SMALL_LIGATURE_FFI:
00538 return (const unsigned char *)"ffi";
00539 case LATIN_SMALL_LIGATURE_FFL:
00540 return (const unsigned char *)"ffl";
00541 case LATIN_SMALL_LIGATURE_LONG_S_T:
00542 case LATIN_SMALL_LIGATURE_ST:
00543 return (const unsigned char *)"st";
00544
00545 /* extra */
00546 case UNKNOWN:
00547 return (const unsigned char *)"X";
00548 case PICTURE:
00549 return (const unsigned char *)"PICTURE";
00550
00551
00552 default:
00553 return (const unsigned char *)UNDEFINED;
00554 }
00555 break;
00556 case TeX:
00557 if ( c >= SPACE && c <= TILDE ) { /* ASCII */
00558 switch (c) {
00559 case '$':
00560 return (const unsigned char *)"\\$";
00561 case '&':
00562 return (const unsigned char *)"\\&";
00563 case '%':
00564 return (const unsigned char *)"\\%";
00565 case '#':
00566 return (const unsigned char *)"\\#";
00567 case '_':
00568 return (const unsigned char *)"\\_";
00569 case '{':
00570 return (const unsigned char *)"\\{";
00571 case '}':
00572 return (const unsigned char *)"\\}";
00573 case '\\':
00574 return (const unsigned char *)"$\\backslash$";
00575 case '~':
00576 return (const unsigned char *)"\\~{}";
00577 case '^':
00578 return (const unsigned char *)"\\^{}";
00579 default:
00580 d = (unsigned char)c;
00581 return (const unsigned char *)&d;
00582 }
00583 }
00584 switch (c) {
00585 /* ISO8859_1 */
00586 case NO_BREAK_SPACE:
00587 return (const unsigned char *)"~";
00588 case INVERTED_EXCLAMATION_MARK:
00589 return (const unsigned char *)"!'";
00590 case CENT_SIGN:
00591 return (const unsigned char *)UNDEFINED;
00592 case POUND_SIGN:
00593 return (const unsigned char *)"\\pounds";
00594 case CURRENCY_SIGN:
00595 return (const unsigned char *)UNDEFINED;
00596 case YEN_SIGN:
00597 return (const unsigned char *)UNDEFINED;
00598 case BROKEN_BAR:
00599 return (const unsigned char *)UNDEFINED;
00600 case SECTION_SIGN:
00601 return (const unsigned char *)"\\S";
00602 case DIAERESIS:
00603 return (const unsigned char *)"\"";
00604 case COPYRIGHT_SIGN:
00605 return (const unsigned char *)"\\copyright";
00606 case FEMININE_ORDINAL_INDICATOR:
00607 return (const unsigned char *)"$^{\\underbar{a}}$";
00608 case LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
00609 return (const unsigned char *)"\\flqq{}";
00610 case NOT_SIGN:
00611 return (const unsigned char *)"$\\lnot$";
00612 case SOFT_HYPHEN:
00613 return (const unsigned char *)"\\-";
00614 case REGISTERED_SIGN:
00615 return (const unsigned char *)UNDEFINED;
00616 case MACRON:
00617 return (const unsigned char *)UNDEFINED;
00618 case DEGREE_SIGN:
00619 return (const unsigned char *)"$^{o}$";
00620 case PLUS_MINUS_SIGN:
00621 return (const unsigned char *)"$\\pm$";
00622 case SUPERSCRIPT_TWO:
00623 return (const unsigned char *)"$^{2}$";
00624 case SUPERSCRIPT_THREE:
00625 return (const unsigned char *)"$^{3}$";
00626 case ACUTE_ACCENT:
00627 return (const unsigned char *)"\\( \\prime \\)";
00628 case MICRO_SIGN:
00629 return (const unsigned char *)"$\\mu$";
00630 case PILCROW_SIGN:
00631 return (const unsigned char *)"\\P";
00632 case MIDDLE_DOT:
00633 return (const unsigned char *)"$\\cdot$";
00634 case CEDILLA:
00635 return (const unsigned char *)"\\,";
00636 case SUPERSCRIPT_ONE:
00637 return (const unsigned char *)"$^{1}$";
00638 case MASCULINE_ORDINAL_INDICATOR:
00639 return (const unsigned char *)"$^{\\underbar{o}}$";
00640 case RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
00641 return (const unsigned char *)"\\frqq{}";
00642 case VULGAR_FRACTION_ONE_QUARTER:
00643 return (const unsigned char *)"\\( 1\\over 4 \\)";
00644 case VULGAR_FRACTION_ONE_HALF:
00645 return (const unsigned char *)"\\( 1\\over 2 \\)";
00646 case VULGAR_FRACTION_THREE_QUARTERS:
00647 return (const unsigned char *)"\\( 3\\over 4 \\)";
00648 case INVERTED_QUESTION_MARK:
00649 return (const unsigned char *)"?'";
00650 case LATIN_CAPITAL_LETTER_A_WITH_GRAVE:
00651 return (const unsigned char *)"\\`A";
00652 case LATIN_CAPITAL_LETTER_A_WITH_ACUTE:
00653 return (const unsigned char *)"\\'A";
00654 case LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX:
00655 return (const unsigned char *)"\\^A";
00656 case LATIN_CAPITAL_LETTER_A_WITH_TILDE:
00657 return (const unsigned char *)"\\~A";
00658 case LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS:
00659 return (const unsigned char *)"\\\"A";
00660 case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
00661 return (const unsigned char *)"\\AA";
00662 case LATIN_CAPITAL_LETTER_AE:
00663 return (const unsigned char *)"\\AE";
00664 case LATIN_CAPITAL_LETTER_C_WITH_CEDILLA:
00665 return (const unsigned char *)"\\C";
00666 case LATIN_CAPITAL_LETTER_E_WITH_GRAVE:
00667 return (const unsigned char *)"\\`E";
00668 case LATIN_CAPITAL_LETTER_E_WITH_ACUTE:
00669 return (const unsigned char *)"\\'E";
00670 case LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX:
00671 return (const unsigned char *)"\\^E";
00672 case LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS:
00673 return (const unsigned char *)"\\\"E";
00674 case LATIN_CAPITAL_LETTER_I_WITH_GRAVE:
00675 return (const unsigned char *)"\\`I";
00676 case LATIN_CAPITAL_LETTER_I_WITH_ACUTE:
00677 return (const unsigned char *)"\\'I";
00678 case LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX:
00679 return (const unsigned char *)"\\^I";
00680 case LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS:
00681 return (const unsigned char *)"\\\"I";
00682 case LATIN_CAPITAL_LETTER_ETH:
00683 return (const unsigned char *)UNDEFINED;
00684 case LATIN_CAPITAL_LETTER_N_WITH_TILDE:
00685 return (const unsigned char *)"\\~N";
00686 case LATIN_CAPITAL_LETTER_O_WITH_GRAVE:
00687 return (const unsigned char *)"\\`O";
00688 case LATIN_CAPITAL_LETTER_O_WITH_ACUTE:
00689 return (const unsigned char *)"\\'O";
00690 case LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX:
00691 return (const unsigned char *)"\\^O";
00692 case LATIN_CAPITAL_LETTER_O_WITH_TILDE:
00693 return (const unsigned char *)"\\~O";
00694 case LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS:
00695 return (const unsigned char *)"\\\"O";
00696 case MULTIPLICATION_SIGN:
00697 return (const unsigned char *)"$\\times$";
00698 case LATIN_CAPITAL_LETTER_O_WITH_STROKE:
00699 return (const unsigned char *)"\\O";
00700 case LATIN_CAPITAL_LETTER_U_WITH_GRAVE:
00701 return (const unsigned char *)"\\`U";
00702 case LATIN_CAPITAL_LETTER_U_WITH_ACUTE:
00703 return (const unsigned char *)"\\'U";
00704 case LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX:
00705 return (const unsigned char *)"\\^U";
00706 case LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS:
00707 return (const unsigned char *)"\\\"U";
00708 case LATIN_CAPITAL_LETTER_Y_WITH_ACUTE:
00709 return (const unsigned char *)"\\'Y";
00710 case LATIN_CAPITAL_LETTER_THORN:
00711 return (const unsigned char *)UNDEFINED;
00712 case LATIN_SMALL_LETTER_SHARP_S:
00713 return (const unsigned char *)"\\ss";
00714 case LATIN_SMALL_LETTER_A_WITH_GRAVE:
00715 return (const unsigned char *)"\\`a";
00716 case LATIN_SMALL_LETTER_A_WITH_ACUTE:
00717 return (const unsigned char *)"\\'a";
00718 case LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX:
00719 return (const unsigned char *)"\\^a";
00720 case LATIN_SMALL_LETTER_A_WITH_TILDE:
00721 return (const unsigned char *)"\\~a";
00722 case LATIN_SMALL_LETTER_A_WITH_DIAERESIS:
00723 return (const unsigned char *)"\\\"a";
00724 case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
00725 return (const unsigned char *)"\\aa";
00726 case LATIN_SMALL_LETTER_AE:
00727 return (const unsigned char *)"\\ae";
00728 case LATIN_SMALL_LETTER_C_WITH_CEDILLA:
00729 return (const unsigned char *)"\\c";
00730 case LATIN_SMALL_LETTER_E_WITH_GRAVE:
00731 return (const unsigned char *)"\\`e";
00732 case LATIN_SMALL_LETTER_E_WITH_ACUTE:
00733 return (const unsigned char *)"\\'e";
00734 case LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX:
00735 return (const unsigned char *)"\\^e";
00736 case LATIN_SMALL_LETTER_E_WITH_DIAERESIS:
00737 return (const unsigned char *)"\\\"e";
00738 case LATIN_SMALL_LETTER_I_WITH_GRAVE:
00739 return (const unsigned char *)"\\`i";
00740 case LATIN_SMALL_LETTER_I_WITH_ACUTE:
00741 return (const unsigned char *)"\\'i";
00742 case LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX:
00743 return (const unsigned char *)"\\^i";
00744 case LATIN_SMALL_LETTER_I_WITH_DIAERESIS:
00745 return (const unsigned char *)"\\\"i";
00746 case LATIN_SMALL_LETTER_ETH:
00747 return (const unsigned char *)UNDEFINED;
00748 case LATIN_SMALL_LETTER_N_WITH_TILDE:
00749 return (const unsigned char *)"\\~n";
00750 case LATIN_SMALL_LETTER_O_WITH_GRAVE:
00751 return (const unsigned char *)"\\`o";
00752 case LATIN_SMALL_LETTER_O_WITH_ACUTE:
00753 return (const unsigned char *)"\\'o";
00754 case LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX:
00755 return (const unsigned char *)"\\^o";
00756 case LATIN_SMALL_LETTER_O_WITH_TILDE:
00757 return (const unsigned char *)"\\~o";
00758 case LATIN_SMALL_LETTER_O_WITH_DIAERESIS:
00759 return (const unsigned char *)"\\\"o";
00760 case DIVISION_SIGN:
00761 return (const unsigned char *)"$\\div$";
00762 case LATIN_SMALL_LETTER_O_WITH_STROKE:
00763 return (const unsigned char *)"\\o";
00764 case LATIN_SMALL_LETTER_U_WITH_GRAVE:
00765 return (const unsigned char *)"\\`u";
00766 case LATIN_SMALL_LETTER_U_WITH_ACUTE:
00767 return (const unsigned char *)"\\'u";
00768 case LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX:
00769 return (const unsigned char *)"\\^u";
00770 case LATIN_SMALL_LETTER_U_WITH_DIAERESIS:
00771 return (const unsigned char *)"\\\"u";
00772 case LATIN_SMALL_LETTER_Y_WITH_ACUTE:
00773 return (const unsigned char *)"\\'y";
00774 case LATIN_SMALL_LETTER_THORN:
00775 return (const unsigned char *)UNDEFINED;
00776 case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
00777 return (const unsigned char *)"\\\"y";
00778
00779
00780
00781 case GREEK_CAPITAL_LETTER_ALPHA:
00782 return (const unsigned char *)"A";
00783 case GREEK_CAPITAL_LETTER_BETA:
00784 return (const unsigned char *)"B";
00785 case GREEK_CAPITAL_LETTER_GAMMA:
00786 return (const unsigned char *)"\\( \\Gamma \\)";
00787 case GREEK_CAPITAL_LETTER_DELTA:
00788 return (const unsigned char *)"\\( \\Delta \\)";
00789 case GREEK_CAPITAL_LETTER_EPSILON:
00790 return (const unsigned char *)"E";
00791 case GREEK_CAPITAL_LETTER_ZETA:
00792 return (const unsigned char *)"Z";
00793 case GREEK_CAPITAL_LETTER_ETA:
00794 return (const unsigned char *)"H";
00795 case GREEK_CAPITAL_LETTER_THETA:
00796 return (const unsigned char *)"\\( \\Theta \\)";
00797 case GREEK_CAPITAL_LETTER_IOTA:
00798 return (const unsigned char *)"I";
00799 case GREEK_CAPITAL_LETTER_KAPPA:
00800 return (const unsigned char *)"K";
00801 case GREEK_CAPITAL_LETTER_LAMDA:
00802 return (const unsigned char *)"\\( \\Lambda \\)";
00803 case GREEK_CAPITAL_LETTER_MU:
00804 return (const unsigned char *)"M";
00805 case GREEK_CAPITAL_LETTER_NU:
00806 return (const unsigned char *)"N";
00807 case GREEK_CAPITAL_LETTER_XI:
00808 return (const unsigned char *)"\\( \\Xi \\)";
00809 case GREEK_CAPITAL_LETTER_OMICRON:
00810 return (const unsigned char *)"O";
00811 case GREEK_CAPITAL_LETTER_PI:
00812 return (const unsigned char *)"\\( \\Pi \\)";
00813 case GREEK_CAPITAL_LETTER_RHO:
00814 return (const unsigned char *)"P";
00815 case GREEK_CAPITAL_LETTER_SIGMA:
00816 return (const unsigned char *)"\\( \\Sigma \\)";
00817 case GREEK_CAPITAL_LETTER_TAU:
00818 return (const unsigned char *)"T";
00819 case GREEK_CAPITAL_LETTER_UPSILON:
00820 return (const unsigned char *)"\\( \\Upsilon \\)";
00821 case GREEK_CAPITAL_LETTER_PHI:
00822 return (const unsigned char *)"\\( \\Phi \\)";
00823 case GREEK_CAPITAL_LETTER_CHI:
00824 return (const unsigned char *)UNDEFINED;
00825 case GREEK_CAPITAL_LETTER_PSI:
00826 return (const unsigned char *)"\\( \\Psi \\)";
00827 case GREEK_CAPITAL_LETTER_OMEGA:
00828 return (const unsigned char *)"\\( \\Omega \\)";
00829 case GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA:
00830 return (const unsigned char *)UNDEFINED;
00831 case GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA:
00832 return (const unsigned char *)UNDEFINED;
00833 case GREEK_SMALL_LETTER_ALPHA_WITH_TONOS:
00834 return (const unsigned char *)UNDEFINED;
00835 case GREEK_SMALL_LETTER_EPSILON_WITH_TONOS:
00836 return (const unsigned char *)UNDEFINED;
00837 case GREEK_SMALL_LETTER_ETA_WITH_TONOS:
00838 return (const unsigned char *)UNDEFINED;
00839 case GREEK_SMALL_LETTER_IOTA_WITH_TONOS:
00840 return (const unsigned char *)UNDEFINED;
00841 case GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS:
00842 return (const unsigned char *)UNDEFINED;
00843 case GREEK_SMALL_LETTER_ALPHA:
00844 return (const unsigned char *)"\\( \\alpha \\)";
00845 case GREEK_SMALL_LETTER_BETA:
00846 return (const unsigned char *)"\\( \\beta \\)";
00847 case GREEK_SMALL_LETTER_GAMMA:
00848 return (const unsigned char *)"\\( \\gamma \\)";
00849 case GREEK_SMALL_LETTER_DELTA:
00850 return (const unsigned char *)"\\( \\delta \\)";
00851 case GREEK_SMALL_LETTER_EPSILON:
00852 return (const unsigned char *)"\\( \\epsilon \\)";
00853 case GREEK_SMALL_LETTER_ZETA:
00854 return (const unsigned char *)"\\( \\zeta \\)";
00855 case GREEK_SMALL_LETTER_ETA:
00856 return (const unsigned char *)"\\( \\eta \\)";
00857 case GREEK_SMALL_LETTER_THETA:
00858 return (const unsigned char *)"\\( \\theta \\)";
00859 case GREEK_SMALL_LETTER_IOTA:
00860 return (const unsigned char *)"\\( \\iota \\)";
00861 case GREEK_SMALL_LETTER_KAPPA:
00862 return (const unsigned char *)"\\( \\kappa \\)";
00863 case GREEK_SMALL_LETTER_LAMDA:
00864 return (const unsigned char *)"\\( \\lambda \\)";
00865 case GREEK_SMALL_LETTER_MU:
00866 return (const unsigned char *)"\\( \\mu \\)";
00867 case GREEK_SMALL_LETTER_NU:
00868 return (const unsigned char *)"\\( \\nu \\)";
00869 case GREEK_SMALL_LETTER_XI:
00870 return (const unsigned char *)"\\( \\xi \\)";
00871 case GREEK_SMALL_LETTER_OMICRON:
00872 return (const unsigned char *)"\\( \\omicron \\)";
00873 case GREEK_SMALL_LETTER_PI:
00874 return (const unsigned char *)"\\( \\pi \\)";
00875 case GREEK_SMALL_LETTER_RHO:
00876 return (const unsigned char *)"\\( \\rho \\)";
00877 case GREEK_SMALL_LETTER_FINAL_SIGMA:
00878 return (const unsigned char *)"\\( \\varsigma \\)";
00879 case GREEK_SMALL_LETTER_SIGMA:
00880 return (const unsigned char *)"\\( \\sigma \\)";
00881 case GREEK_SMALL_LETTER_TAU:
00882 return (const unsigned char *)"\\( \\tau \\)";
00883 case GREEK_SMALL_LETTER_UPSILON:
00884 return (const unsigned char *)"\\( \\upsilon \\)";
00885 case GREEK_SMALL_LETTER_PHI:
00886 return (const unsigned char *)"\\( \\varphi \\)";
00887 case GREEK_SMALL_LETTER_CHI:
00888 return (const unsigned char *)"\\( \\chi \\)";
00889 case GREEK_SMALL_LETTER_PSI:
00890 return (const unsigned char *)"\\( \\psi \\)";
00891 case GREEK_SMALL_LETTER_OMEGA:
00892 return (const unsigned char *)"\\( \\omega \\)";
00893 case GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA:
00894 return (const unsigned char *)UNDEFINED;
00895 case GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA:
00896 return (const unsigned char *)UNDEFINED;
00897 case GREEK_SMALL_LETTER_OMICRON_WITH_TONOS:
00898 return (const unsigned char *)UNDEFINED;
00899 case GREEK_SMALL_LETTER_UPSILON_WITH_TONOS:
00900 return (const unsigned char *)UNDEFINED;
00901 case GREEK_SMALL_LETTER_OMEGA_WITH_TONOS:
00902 return (const unsigned char *)UNDEFINED;
00903 case GREEK_BETA_SYMBOL:
00904 return (const unsigned char *)UNDEFINED;
00905 case GREEK_THETA_SYMBOL:
00906 return (const unsigned char *)"\\( \\vartheta \\)";
00907 case GREEK_UPSILON_WITH_HOOK_SYMBOL:
00908 return (const unsigned char *)UNDEFINED;
00909 case GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL:
00910 return (const unsigned char *)UNDEFINED;
00911 case GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL:
00912 return (const unsigned char *)UNDEFINED;
00913 case GREEK_PHI_SYMBOL:
00914 return (const unsigned char *)"\\( \\phi \\)";
00915 case GREEK_PI_SYMBOL:
00916 return (const unsigned char *)"\\( \\varpi \\)";
00917
00918
00919
00920 case HYPHEN:
00921 return (const unsigned char *)"-";
00922 case NON_BREAKING_HYPHEN:
00923 return (const unsigned char *)UNDEFINED;
00924 case FIGURE_DASH:
00925 case EN_DASH:
00926 return (const unsigned char *)"--";
00927 case EM_DASH:
00928 return (const unsigned char *)"---";
00929 case HORIZONTAL_BAR:
00930 return (const unsigned char *)UNDEFINED;
00931 case LEFT_SINGLE_QUOTATION_MARK:
00932 return (const unsigned char *)"`";
00933 case RIGHT_SINGLE_QUOTATION_MARK:
00934 return (const unsigned char *)"'";
00935 case SINGLE_LOW_9_QUOTATION_MARK:
00936 return (const unsigned char *)"\\glq{}";
00937 case SINGLE_HIGH_REVERSED_9_QUOTATION_MARK:
00938 return (const unsigned char *)UNDEFINED;
00939 case LEFT_DOUBLE_QUOTATION_MARK:
00940 return (const unsigned char *)"``";
00941 case RIGHT_DOUBLE_QUOTATION_MARK:
00942 return (const unsigned char *)"''";
00943 case DOUBLE_LOW_9_QUOTATION_MARK:
00944 return (const unsigned char *)"\\glqq{}";
00945 case DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK:
00946 return (const unsigned char *)UNDEFINED;
00947 case DAGGER:
00948 return (const unsigned char *)"\\dag";
00949 case DOUBLE_DAGGER:
00950 return (const unsigned char *)"\\ddag";
00951 case BULLET:
00952 return (const unsigned char *)"$\\bullet$";
00953 case TRIANGULAR_BULLET:
00954 return (const unsigned char *)"$\\blacktriangleright";
00955 case HYPHENATION_POINT:
00956 return (const unsigned char *)"\\-";
00957 case HORIZONTAL_ELLIPSIS:
00958 return (const unsigned char *)"\\ldots";
00959 case PER_MILLE_SIGN:
00960 return (const unsigned char *)UNDEFINED;
00961 case SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK:
00962 return (const unsigned char *)"\\flq{}";
00963 case SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK:
00964 return (const unsigned char *)"\\frq{}";
00965
00966 case LATIN_SMALL_LIGATURE_FF:
00967 return (const unsigned char *)"ff";
00968 case LATIN_SMALL_LIGATURE_FI:
00969 return (const unsigned char *)"fi";
00970 case LATIN_SMALL_LIGATURE_FL:
00971 return (const unsigned char *)"fl";
00972 case LATIN_SMALL_LIGATURE_FFI:
00973 return (const unsigned char *)"ffi";
00974 case LATIN_SMALL_LIGATURE_FFL:
00975 return (const unsigned char *)"ffl";
00976 case LATIN_SMALL_LIGATURE_LONG_S_T:
00977 case LATIN_SMALL_LIGATURE_ST:
00978 return (const unsigned char *)"st";
00979
00980 case UNKNOWN:
00981 return (const unsigned char *)"X";
00982 case PICTURE:
00983 return (const unsigned char *)"PICTURE";
00984 default:
00985 return (const unsigned char *)UNDEFINED;
00986 }
00987 case HTML:
00988 if ( c >= SPACE && c <= TILDE ) {
00989 d = (unsigned char)c;
00990 return &d;
00991 }
00992 switch (c) {
00993 case FORM_FEED:
00994 case CARRIAGE_RETURN:
00995 return (const unsigned char *)"<br>";
00996 case NO_BREAK_SPACE:
00997 return (const unsigned char *)"<nobr>";
00998 case INVERTED_EXCLAMATION_MARK:
00999 return (const unsigned char *)"¡";
01000 case CENT_SIGN:
01001 return (const unsigned char *)"¢";
01002 case POUND_SIGN:
01003 return (const unsigned char *)"£";
01004 case CURRENCY_SIGN:
01005 return (const unsigned char *)"¤";
01006 case YEN_SIGN:
01007 return (const unsigned char *)"¥";
01008 case BROKEN_BAR:
01009 return (const unsigned char *)"¦";
01010 case SECTION_SIGN:
01011 return (const unsigned char *)"§";
01012 case DIAERESIS:
01013 return (const unsigned char *)"¨";
01014 case COPYRIGHT_SIGN:
01015 return (const unsigned char *)"©";
01016 case FEMININE_ORDINAL_INDICATOR:
01017 return (const unsigned char *)"ªem;";
01018 case LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
01019 return (const unsigned char *)"«";
01020 case NOT_SIGN:
01021 return (const unsigned char *)"¬";
01022 case SOFT_HYPHEN:
01023 return (const unsigned char *)"­";
01024 case REGISTERED_SIGN:
01025 return (const unsigned char *)"®";
01026 case MACRON:
01027 return (const unsigned char *)"¯";
01028 case DEGREE_SIGN:
01029 return (const unsigned char *)"°";
01030 case PLUS_MINUS_SIGN:
01031 return (const unsigned char *)"±";
01032 case SUPERSCRIPT_TWO:
01033 return (const unsigned char *)"²";
01034 case SUPERSCRIPT_THREE:
01035 return (const unsigned char *)"³";
01036 case ACUTE_ACCENT:
01037 return (const unsigned char *)"´";
01038 case MICRO_SIGN:
01039 return (const unsigned char *)"µ";
01040 case PILCROW_SIGN:
01041 return (const unsigned char *)"¶";
01042 case MIDDLE_DOT:
01043 return (const unsigned char *)"·";
01044 case CEDILLA:
01045 return (const unsigned char *)"¸";
01046 case SUPERSCRIPT_ONE:
01047 return (const unsigned char *)"¹";
01048 case MASCULINE_ORDINAL_INDICATOR:
01049 return (const unsigned char *)"º";
01050 case RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
01051 return (const unsigned char *)"»";
01052 case VULGAR_FRACTION_ONE_QUARTER:
01053 return (const unsigned char *)"¼";
01054 case VULGAR_FRACTION_ONE_HALF:
01055 return (const unsigned char *)"½";
01056 case VULGAR_FRACTION_THREE_QUARTERS:
01057 return (const unsigned char *)"¾";
01058 case INVERTED_QUESTION_MARK:
01059 return (const unsigned char *)"¿";
01060 case LATIN_CAPITAL_LETTER_A_WITH_GRAVE:
01061 return (const unsigned char *)"À";
01062 case LATIN_CAPITAL_LETTER_A_WITH_ACUTE:
01063 return (const unsigned char *)"Á";
01064 case LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX:
01065 return (const unsigned char *)"Â";
01066 case LATIN_CAPITAL_LETTER_A_WITH_TILDE:
01067 return (const unsigned char *)"Ã";
01068 case LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS:
01069 return (const unsigned char *)"Ä";
01070 case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
01071 return (const unsigned char *)"Å";
01072 case LATIN_CAPITAL_LETTER_AE:
01073 return (const unsigned char *)"Æ";
01074 case LATIN_CAPITAL_LETTER_C_WITH_CEDILLA:
01075 return (const unsigned char *)"Ç";
01076 case LATIN_CAPITAL_LETTER_E_WITH_GRAVE:
01077 return (const unsigned char *)"È";
01078 case LATIN_CAPITAL_LETTER_E_WITH_ACUTE:
01079 return (const unsigned char *)"É";
01080 case LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX:
01081 return (const unsigned char *)"Ê";
01082 case LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS:
01083 return (const unsigned char *)"Ë";
01084 case LATIN_CAPITAL_LETTER_I_WITH_GRAVE:
01085 return (const unsigned char *)"Ì";
01086 case LATIN_CAPITAL_LETTER_I_WITH_ACUTE:
01087 return (const unsigned char *)"Í";
01088 case LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX:
01089 return (const unsigned char *)"Î";
01090 case LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS:
01091 return (const unsigned char *)"Ï";
01092 case LATIN_CAPITAL_LETTER_ETH:
01093 return (const unsigned char *)"Ð";
01094 case LATIN_CAPITAL_LETTER_N_WITH_TILDE:
01095 return (const unsigned char *)"Ñ";
01096 case LATIN_CAPITAL_LETTER_O_WITH_GRAVE:
01097 return (const unsigned char *)"Ò";
01098 case LATIN_CAPITAL_LETTER_O_WITH_ACUTE:
01099 return (const unsigned char *)"Ó";
01100 case LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX:
01101 return (const unsigned char *)"Ô";
01102 case LATIN_CAPITAL_LETTER_O_WITH_TILDE:
01103 return (const unsigned char *)"Õ";
01104 case LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS:
01105 return (const unsigned char *)"Ö";
01106 case MULTIPLICATION_SIGN:
01107 return (const unsigned char *)"×";
01108 case LATIN_CAPITAL_LETTER_O_WITH_STROKE:
01109 return (const unsigned char *)"Ø";
01110 case LATIN_CAPITAL_LETTER_U_WITH_GRAVE:
01111 return (const unsigned char *)"Ù";
01112 case LATIN_CAPITAL_LETTER_U_WITH_ACUTE:
01113 return (const unsigned char *)"Ú";
01114 case LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX:
01115 return (const unsigned char *)"Û";
01116 case LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS:
01117 return (const unsigned char *)"Ü";
01118 case LATIN_CAPITAL_LETTER_Y_WITH_ACUTE:
01119 return (const unsigned char *)"Ý";
01120 case LATIN_CAPITAL_LETTER_THORN:
01121 return (const unsigned char *)"Þ";
01122 case LATIN_SMALL_LETTER_SHARP_S:
01123 return (const unsigned char *)"ß";
01124 case LATIN_SMALL_LETTER_A_WITH_GRAVE:
01125 return (const unsigned char *)"à";
01126 case LATIN_SMALL_LETTER_A_WITH_ACUTE:
01127 return (const unsigned char *)"´";
01128 case LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX:
01129 return (const unsigned char *)"â";
01130 case LATIN_SMALL_LETTER_A_WITH_TILDE:
01131 return (const unsigned char *)"ã";
01132 case LATIN_SMALL_LETTER_A_WITH_DIAERESIS:
01133 return (const unsigned char *)"ä";
01134 case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
01135 return (const unsigned char *)"å";
01136 case LATIN_SMALL_LETTER_AE:
01137 return (const unsigned char *)"æ";
01138 case LATIN_SMALL_LETTER_C_WITH_CEDILLA:
01139 return (const unsigned char *)"ç";
01140 case LATIN_SMALL_LETTER_E_WITH_GRAVE:
01141 return (const unsigned char *)"è";
01142 case LATIN_SMALL_LETTER_E_WITH_ACUTE:
01143 return (const unsigned char *)"é";
01144 case LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX:
01145 return (const unsigned char *)"ê";
01146 case LATIN_SMALL_LETTER_E_WITH_DIAERESIS:
01147 return (const unsigned char *)"ë";
01148 case LATIN_SMALL_LETTER_I_WITH_GRAVE:
01149 return (const unsigned char *)"ì";
01150 case LATIN_SMALL_LETTER_I_WITH_ACUTE:
01151 return (const unsigned char *)"í";
01152 case LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX:
01153 return (const unsigned char *)"î";
01154 case LATIN_SMALL_LETTER_I_WITH_DIAERESIS:
01155 return (const unsigned char *)"ï";
01156 case LATIN_SMALL_LETTER_ETH:
01157 return (const unsigned char *)"ð";
01158 case LATIN_SMALL_LETTER_N_WITH_TILDE:
01159 return (const unsigned char *)"ñ";
01160 case LATIN_SMALL_LETTER_O_WITH_GRAVE:
01161 return (const unsigned char *)"ò";
01162 case LATIN_SMALL_LETTER_O_WITH_ACUTE:
01163 return (const unsigned char *)"ó";
01164 case LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX:
01165 return (const unsigned char *)"ô";
01166 case LATIN_SMALL_LETTER_O_WITH_TILDE:
01167 return (const unsigned char *)"õ";
01168 case LATIN_SMALL_LETTER_O_WITH_DIAERESIS:
01169 return (const unsigned char *)"ö";
01170 case DIVISION_SIGN:
01171 return (const unsigned char *)"÷";
01172 case LATIN_SMALL_LETTER_O_WITH_STROKE:
01173 return (const unsigned char *)"ø";
01174 case LATIN_SMALL_LETTER_U_WITH_GRAVE:
01175 return (const unsigned char *)"ù";
01176 case LATIN_SMALL_LETTER_U_WITH_ACUTE:
01177 return (const unsigned char *)"ú";
01178 case LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX:
01179 return (const unsigned char *)"û";
01180 case LATIN_SMALL_LETTER_U_WITH_DIAERESIS:
01181 return (const unsigned char *)"ü";
01182 case LATIN_SMALL_LETTER_Y_WITH_ACUTE:
01183 return (const unsigned char *)"ý";
01184 case LATIN_SMALL_LETTER_THORN:
01185 return (const unsigned char *)"þ";
01186 case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
01187 return (const unsigned char *)"ÿ";
01188 default:
01189 return (const unsigned char *)UNDEFINED;
01190 }
01191 break;
01192 case SGML:
01193 switch (c) {
01194 default:
01195 return (const unsigned char *)UNDEFINED;
01196 }
01197 break;
01198 default:
01199 return (const unsigned char *)NULL;
01200 }
01201 }
01202 #endif