/*
 * The authors of this software are Rob Pike and Ken Thompson.
 *              Copyright (c) 2002 by Lucent Technologies.
 * Permission to use, copy, modify, and distribute this software for any
 * purpose without fee is hereby granted, provided that this entire notice
 * is included in all copies of any software which is or includes a copy
 * or modification of this software and in all copies of the supporting
 * documentation for such software.
 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 */
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <stdio.h>
#include "plan9.h"
#include "utf.h"

enum
{
        Bit1    = 7,
        Bitx    = 6,
        Bit2    = 5,
        Bit3    = 4,
        Bit4    = 3,

        T1      = ((1<<(Bit1+1))-1) ^ 0xFF,     /* 0000 0000 */
        Tx      = ((1<<(Bitx+1))-1) ^ 0xFF,     /* 1000 0000 */
        T2      = ((1<<(Bit2+1))-1) ^ 0xFF,     /* 1100 0000 */
        T3      = ((1<<(Bit3+1))-1) ^ 0xFF,     /* 1110 0000 */
        T4      = ((1<<(Bit4+1))-1) ^ 0xFF,     /* 1111 0000 */

        Rune1   = (1<<(Bit1+0*Bitx))-1,         /* 0000 0000 0111 1111 */
        Rune2   = (1<<(Bit2+1*Bitx))-1,         /* 0000 0111 1111 1111 */
        Rune3   = (1<<(Bit3+2*Bitx))-1,         /* 1111 1111 1111 1111 */

        Maskx   = (1<<Bitx)-1,                  /* 0011 1111 */
        Testx   = Maskx ^ 0xFF,                 /* 1100 0000 */

        Bad     = Runeerror
};

int
chartorune(Rune *rune, const char *str)
{
        int c, c1, c2;
        long l;

        /*
         * one character sequence
         *      00000-0007F => T1
         */
        c = *(uchar*)str;
        if(c < Tx) {
                *rune = c;
                return 1;
        }

        /*
         * two character sequence
         *      0080-07FF => T2 Tx
         */
        c1 = *(uchar*)(str+1) ^ Tx;
        if(c1 & Testx)
                goto bad;
        if(c < T3) {
                if(c < T2)
                        goto bad;
                l = ((c << Bitx) | c1) & Rune2;
                if(l <= Rune1)
                        goto bad;
                *rune = l;
                return 2;
        }

        /*
         * three character sequence
         *      0800-FFFF => T3 Tx Tx
         */
        c2 = *(uchar*)(str+2) ^ Tx;
        if(c2 & Testx)
                goto bad;
        if(c < T4) {
                l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
                if(l <= Rune2)
                        goto bad;
                *rune = l;
                return 3;
        }

        /*
         * bad decoding
         */
bad:
//printf("bad\n");
        *rune = Bad;
        return 1;
}

int
runetochar(char *str, Rune *rune)
{
        long c;

        /*
         * one character sequence
         *      00000-0007F => 00-7F
         */
        c = *rune;
        if(c <= Rune1) {
                str[0] = c;
                return 1;
        }

        /*
         * two character sequence
         *      0080-07FF => T2 Tx
         */
        if(c <= Rune2) {
                str[0] = T2 | (c >> 1*Bitx);
                str[1] = Tx | (c & Maskx);
                return 2;
        }

        /*
         * three character sequence
         *      0800-FFFF => T3 Tx Tx
         */
        str[0] = T3 |  (c >> 2*Bitx);
        str[1] = Tx | ((c >> 1*Bitx) & Maskx);
        str[2] = Tx |  (c & Maskx);
        return 3;
}

int
runelen(long c)
{
        Rune rune;
        char str[10];

        rune = c;
        return runetochar(str, &rune);
}

int
runenlen(Rune *r, int nrune)
{
        int nb, c;

        nb = 0;
        while(nrune--) {
                c = *r++;
                if(c <= Rune1)
                        nb++;
                else
                if(c <= Rune2)
                        nb += 2;
                else
                        nb += 3;
        }
        return nb;
}

int
fullrune(char *str, int n)
{
        int c;

        if(n > 0) {
                c = *(uchar*)str;
                if(c < Tx)
                        return 1;
                if(n > 1)
                        if(c < T3 || n > 2)
                                return 1;
        }
        return 0;
}

char *utfstrtoupper(char *s) {
	
	char *t, *v, *orig;
	int n;
    
	orig=s;
    Rune r;

	v = malloc(UTFmax*strlen(s) + 1);
	t = v;
	while(*s){
	        /* in decoding error, assume latin1 */
	        if((n=chartorune(&r, s)) == 1 && r == Runeerror)
	                r = (uchar)*s;
	        s += n;
			r = toupperrune(r);
	        t += runetochar(t, &r);
	}
	*t = '\0';

	strcpy(orig, v);
	free(v);
	return orig;
}

int
utflen(const char *s)
{
        int c;
        long n;
        Rune rune;

        n = 0;
        for(;;) {
                c = *(uchar*)s;
                if(c < Runeself) {
                        if(c == 0)
                                return n;
                        s++;
                } else
                        s += chartorune(&rune, s);
                n++;
        }
        return 0;
}

Rune*
strtorune(Rune *r, char *s)
{
        Rune *or;

        if(s == nil)
                return nil;

        or = r;
        while(*s)
                s += chartorune(r++, s);
        *r = L'\0';
        return or;
}

char *runetostr(char *buf, Rune *r){
        char *s;
        for(s=buf;*r;r++) s+=runetochar(s, r);
        *s='\0';
        return buf;
}


char *utfcpystr(char *dst, const char *src, int from, int to) {
	
	char *or = dst;
	int len = utflen(src);
	Rune *s = calloc(len+1, sizeof(Rune));
	
	strtorune(s, (char*)src);
	Rune *p = s+from;
	
	to = to-from+1;
	if (to > len)
		to = len;
		
	while (to--)
		dst += runetochar(dst, p++);
	
	*dst = '\0';
	
	free(s);
	return or;
} 

char *utfreplacechar(char *str, const char *this, int pos) {
	
	int len = utflen(str);
	Rune *s = calloc(len+1, sizeof(Rune));
	
	strtorune(s, str);
	chartorune(s+pos, (char*)this);

	runetostr(str, s);
	
	free(s);
	return str;
} 

/* go from url with escaped utf to utf */
char *urlunesc(char *s)
{
        char *t, *v, *u, *orig;
		orig=s;
        Rune r;
        int c, n;

        /* unescape */
        u = malloc(strlen(s)+1);
        for(t = u; (c = *s); s++){
                if(c == '%'){
                        n = s[1];
						if (n != 'u') {
							
	                        if(n >= '0' && n <= '9')
	                                n = n - '0';
	                        else if(n >= 'A' && n <= 'F')
	                                n = n - 'A' + 10;
	                        else if(n >= 'a' && n <= 'f')
	                                n = n - 'a' + 10;
	                        else
	                                break;
	                        r = n;
	                        n = s[2];
	                        if(n >= '0' && n <= '9')
	                                n = n - '0';
	                        else if(n >= 'A' && n <= 'F')
	                                n = n - 'A' + 10;
	                        else if(n >= 'a' && n <= 'f')
	                                n = n - 'a' + 10;
	                        else
	                                break;
	                        s += 2;
	                        c = (r<<4)+n;							
						}
						else {
							// non-standard %uHHHH coding
							/*
					         * two character sequence
					         *      0080-07FF => T2 Tx
					         */
					        
							n = s[2];
	                        if(n >= '0' && n <= '9')
	                                n = n - '0';
	                        else if(n >= 'A' && n <= 'F')
	                                n = n - 'A' + 10;
	                        else if(n >= 'a' && n <= 'f')
	                                n = n - 'a' + 10;
	                        else
	                                break;
	                        r = n;
	                        n = s[3];
	                        if(n >= '0' && n <= '9')
	                                n = n - '0';
	                        else if(n >= 'A' && n <= 'F')
	                                n = n - 'A' + 10;
	                        else if(n >= 'a' && n <= 'f')
	                                n = n - 'a' + 10;
	                        else
	                                break;
							Rune ru;
	                        ru = ((r<<4)+n)<<8;	

							n = s[4];
	                        if(n >= '0' && n <= '9')
	                                n = n - '0';
	                        else if(n >= 'A' && n <= 'F')
	                                n = n - 'A' + 10;
	                        else if(n >= 'a' && n <= 'f')
	                                n = n - 'a' + 10;
	                        else
	                                break;
	                        r = n;
	                        n = s[5];
	                        if(n >= '0' && n <= '9')
	                                n = n - '0';
	                        else if(n >= 'A' && n <= 'F')
	                                n = n - 'A' + 10;
	                        else if(n >= 'a' && n <= 'f')
	                                n = n - 'a' + 10;
	                        else
	                                break;
	                        ru |= (r<<4)+n;							
							t += runetochar(t, &ru);
	                        s += 5;
							c = 0;
						}
                }
				if (c)
                	*t++ = c;
        }
        *t = '\0';

        /* convert to valid utf */
        v = malloc(UTFmax*strlen(u) + 1);
        s = u;
        t = v;
        while(*s){
                /* in decoding error, assume latin1 */
                if((n=chartorune(&r, s)) == 1 && r == Runeerror)
                        r = (uchar)*s;
                s += n;
                t += runetochar(t, &r);
        }
        *t = '\0';

		strcpy(orig, v);

		free(u);
		free(v);

        return orig;
}

void urlescape(char *out, const char *source) {

	static const char *h = "0123456789ABCDEF";
	const char *c = source;
	Rune r;

	while (*c) {
	
		c += chartorune(&r, c);
		
	    if (('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') || ('0' <= r && r <= '9') || (r == '-') || (r == '_') || (r == '.') || (r == '~')) {
			*out++ = (char) r;
		}
		else
		if (isspacerune(r) && (r!=10)) {
			*out++ = '%';
			*out++ = '2';
			*out++ = '0';
		}
		else {
			if (r <= 0xff) {
				*out++ = '%';
				*out++ = h[r >> 4];
		        *out++ = h[r & 0x0f];
			}
			else if (r <= 0xffff) {
				int a = r >> 8;
				*out++ = '%';
				*out++ = 'u';
				*out++ = h[(a & 0xff) >> 4];
		        *out++ = h[a & 0x0f];
				*out++ = h[(r & 0xff) >> 4];
		        *out++ = h[r & 0x0f];
			}
		}
	}
	*out = 0;
}

