From a888c34dbb345f561fe901f2deb1472a1096d633 Mon Sep 17 00:00:00 2001 From: Thomas Porzelt Date: Thu, 5 Mar 2009 00:19:55 +0100 Subject: [PATCH] add encoding functions --- include/lighttpd/base.h | 1 + include/lighttpd/encoding.h | 17 ++++++ src/encoding.c | 107 ++++++++++++++++++++++++++++++++++++ src/wscript | 1 + 4 files changed, 126 insertions(+) create mode 100644 include/lighttpd/encoding.h create mode 100644 src/encoding.c diff --git a/include/lighttpd/base.h b/include/lighttpd/base.h index b0a418d..1d0204a 100644 --- a/include/lighttpd/base.h +++ b/include/lighttpd/base.h @@ -51,6 +51,7 @@ #include #include #include +#include #include #define SERVER_VERSION ((guint) 0x01FF0000) diff --git a/include/lighttpd/encoding.h b/include/lighttpd/encoding.h new file mode 100644 index 0000000..fe0ad82 --- /dev/null +++ b/include/lighttpd/encoding.h @@ -0,0 +1,17 @@ +#ifndef _LIGHTTPD_ENCODING_H_ +#define _LIGHTTPD_ENCODING_H_ + +#ifndef _LIGHTTPD_BASE_H_ +#error Please include instead of this file +#endif + +typedef enum { + ENCODING_HEX, /* a => 61 */ + ENCODING_HTML /* HTML special chars. & => & e.g. */ +} encoding_t; + + +/* encodes special characters in a string and returns the new string */ +GString *string_encode(const gchar *str, encoding_t encoding); + +#endif \ No newline at end of file diff --git a/src/encoding.c b/src/encoding.c new file mode 100644 index 0000000..9a85656 --- /dev/null +++ b/src/encoding.c @@ -0,0 +1,107 @@ +#include + +static const gchar hex_chars[] = "0123456789abcdef"; + +/* HEX */ +static const gchar encode_map_hex[] = { + /* + 0 1 2 3 4 5 6 7 8 9 A B C D E F + */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00 - 0F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10 - 1F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 20 - 2F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 30 - 3F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 40 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 50 - 50 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 60 - 60 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 70 - 70 */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 80 - 8F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 90 - 9F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* A0 - AF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* B0 - BF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* C0 - CF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* D0 - DF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* E0 - EF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* F0 - FF */ +}; + +/* HTML */ +static const gchar encode_map_html[] = { + /* + 0 1 2 3 4 5 6 7 8 9 A B C D E F + */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00 - 0F control chars */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10 - 1F */ + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20 - 2F & */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* 30 - 3F < > */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40 - 40 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 50 - 50 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60 - 60 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 70 - 70 DEL */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 80 - 8F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 90 - 9F */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* A0 - AF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* B0 - BF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* C0 - CF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* D0 - DF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* E0 - EF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* F0 - FF */ +}; + +GString *string_encode(const gchar *str, encoding_t encoding) { + /* replace html chars with &#xHH; */ + GString *result; + guchar *c; + guchar *pos; + gsize new_len = 0; + guint encoded_len = 0; + const gchar *map = NULL; + + switch (encoding) { + case ENCODING_HTML: + map = encode_map_html; + encoded_len = 6; + break; + case ENCODING_HEX: + map = encode_map_hex; + encoded_len = 2; + break; + } + + /* check how many chars need to be encoded */ + for (c = (guchar*)str; *c != '\0'; c++) { + if (map[*c]) + new_len += encoded_len; + else + new_len++; + } + + result = g_string_sized_new(new_len); + + for (c = (guchar*)str, pos = (guchar*)result->str; *c != '\0'; c++) { + if (map[*c]) { + /* char needs to be encoded */ + switch (encoding) { + case ENCODING_HTML: + /* &#xHH */ + *pos++ = '&'; + *pos++ = '#'; + *pos++ = 'x'; + *pos++ = hex_chars[((*c) >> 4) & 0x0F]; + *pos++ = hex_chars[(*c) & 0x0F]; + *pos++ = ';'; + break; + case ENCODING_HEX: + *pos++ = hex_chars[((*c) >> 4) & 0x0F]; + *pos++ = hex_chars[(*c) & 0x0F]; + } + } else { + /* no encoding needed */ + *pos++ = *c; + } + } + + *pos = '\0'; + + return result; +} \ No newline at end of file diff --git a/src/wscript b/src/wscript index 2e04162..3d91507 100644 --- a/src/wscript +++ b/src/wscript @@ -26,6 +26,7 @@ common_src = ''' condition_parsers.rl config_parser.rl connection.c + encoding.c environment.c filter_chunked.c http_headers.c