123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109 |
- /*
- punycode.c from RFC 3492
- http://www.nicemice.net/idn/
- Adam M. Costello
- http://www.nicemice.net/amc/
- This is ANSI C code (C89) implementing Punycode (RFC 3492).
- C. Disclaimer and license
- Regarding this entire document or any portion of it (including
- the pseudocode and C code), the author makes no guarantees and
- is not responsible for any damage resulting from its use. The
- author grants irrevocable permission to anyone to use, modify,
- and distribute it in any way that does not diminish the rights
- of anyone else to use, modify, and distribute it, provided that
- redistributed derivative works do not contain misleading author or
- version information. Derivative works need not be licensed under
- similar terms.
- */
- #ifdef __cplusplus
- extern "C" {
- #endif /* __cplusplus */
- /************************************************************/
- /* Public interface (would normally go in its own .h file): */
- #include <limits.h>
- enum punycode_status {
- punycode_success,
- punycode_bad_input, /* Input is invalid. */
- punycode_big_output, /* Output would exceed the space provided. */
- punycode_overflow /* Input needs wider integers to process. */
- };
- #if UINT_MAX >= (1 << 26) - 1
- typedef unsigned int punycode_uint;
- #else
- typedef unsigned long punycode_uint;
- #endif
- enum punycode_status punycode_encode(
- punycode_uint input_length,
- const punycode_uint input[],
- const unsigned char case_flags[],
- punycode_uint *output_length,
- char output[] );
- /* punycode_encode() converts Unicode to Punycode. The input */
- /* is represented as an array of Unicode code points (not code */
- /* units; surrogate pairs are not allowed), and the output */
- /* will be represented as an array of ASCII code points. The */
- /* output string is *not* null-terminated; it will contain */
- /* zeros if and only if the input contains zeros. (Of course */
- /* the caller can leave room for a terminator and add one if */
- /* needed.) The input_length is the number of code points in */
- /* the input. The output_length is an in/out argument: the */
- /* caller passes in the maximum number of code points that it */
- /* can receive, and on successful return it will contain the */
- /* number of code points actually output. The case_flags array */
- /* holds input_length boolean values, where nonzero suggests that */
- /* the corresponding Unicode character be forced to uppercase */
- /* after being decoded (if possible), and zero suggests that */
- /* it be forced to lowercase (if possible). ASCII code points */
- /* are encoded literally, except that ASCII letters are forced */
- /* to uppercase or lowercase according to the corresponding */
- /* uppercase flags. If case_flags is a null pointer then ASCII */
- /* letters are left as they are, and other code points are */
- /* treated as if their uppercase flags were zero. The return */
- /* value can be any of the punycode_status values defined above */
- /* except punycode_bad_input; if not punycode_success, then */
- /* output_size and output might contain garbage. */
- enum punycode_status punycode_decode(
- punycode_uint input_length,
- const char input[],
- punycode_uint *output_length,
- punycode_uint output[],
- unsigned char case_flags[] );
- /* punycode_decode() converts Punycode to Unicode. The input is */
- /* represented as an array of ASCII code points, and the output */
- /* will be represented as an array of Unicode code points. The */
- /* input_length is the number of code points in the input. The */
- /* output_length is an in/out argument: the caller passes in */
- /* the maximum number of code points that it can receive, and */
- /* on successful return it will contain the actual number of */
- /* code points output. The case_flags array needs room for at */
- /* least output_length values, or it can be a null pointer if the */
- /* case information is not needed. A nonzero flag suggests that */
- /* the corresponding Unicode character be forced to uppercase */
- /* by the caller (if possible), while zero suggests that it be */
- /* forced to lowercase (if possible). ASCII code points are */
- /* output already in the proper case, but their flags will be set */
- /* appropriately so that applying the flags would be harmless. */
- /* The return value can be any of the punycode_status values */
- /* defined above; if not punycode_success, then output_length, */
- /* output, and case_flags might contain garbage. On success, the */
- /* decoder will never need to write an output_length greater than */
- /* input_length, because of how the encoding is defined. */
- #ifdef __cplusplus
- }
- #endif /* __cplusplus */
|