package json

import (
	"unicode/utf8"
)

const (
	supplementalPlanesOffset     = 0x10000
	highSurrogateOffset          = 0xD800
	lowSurrogateOffset           = 0xDC00
	surrogateEnd                 = 0xDFFF
	basicMultilingualPlaneOffset = 0xFFFF
	badHex                       = -1

	singleUnicodeEscapeLen = 6
	surrogatePairLen       = 12
)

var hexLookupTable = [256]int{
	'0': 0x0, '1': 0x1, '2': 0x2, '3': 0x3, '4': 0x4,
	'5': 0x5, '6': 0x6, '7': 0x7, '8': 0x8, '9': 0x9,
	'A': 0xA, 'B': 0xB, 'C': 0xC, 'D': 0xD, 'E': 0xE, 'F': 0xF,
	'a': 0xA, 'b': 0xB, 'c': 0xC, 'd': 0xD, 'e': 0xE, 'f': 0xF,
	// Fill unspecified index-value pairs with key and value of -1
	'G': -1, 'H': -1, 'I': -1, 'J': -1,
	'K': -1, 'L': -1, 'M': -1, 'N': -1,
	'O': -1, 'P': -1, 'Q': -1, 'R': -1,
	'S': -1, 'T': -1, 'U': -1, 'V': -1,
	'W': -1, 'X': -1, 'Y': -1, 'Z': -1,
	'g': -1, 'h': -1, 'i': -1, 'j': -1,
	'k': -1, 'l': -1, 'm': -1, 'n': -1,
	'o': -1, 'p': -1, 'q': -1, 'r': -1,
	's': -1, 't': -1, 'u': -1, 'v': -1,
	'w': -1, 'x': -1, 'y': -1, 'z': -1,
}

func h2i(c byte) int {
	return hexLookupTable[c]
}

// Unescape takes an input byte slice, processes it to Unescape certain characters,
// and writes the result into an output byte slice.
//
// it returns the processed slice and any error encountered during the Unescape operation.
func Unescape(input, output []byte) ([]byte, error) {
	// ensure the output slice has enough capacity to hold the input slice.
	inputLen := len(input)
	if cap(output) < inputLen {
		output = make([]byte, inputLen)
	}

	inPos, outPos := 0, 0

	for inPos < len(input) {
		c := input[inPos]
		if c != backSlash {
			output[outPos] = c
			inPos++
			outPos++
		} else {
			// process escape sequence
			inLen, outLen, err := processEscapedUTF8(input[inPos:], output[outPos:])
			if err != nil {
				return nil, err
			}
			inPos += inLen
			outPos += outLen
		}
	}

	return output[:outPos], nil
}

// isSurrogatePair returns true if the rune is a surrogate pair.
//
// A surrogate pairs are used in UTF-16 encoding to encode characters
// outside the Basic Multilingual Plane (BMP).
func isSurrogatePair(r rune) bool {
	return highSurrogateOffset <= r && r <= surrogateEnd
}

// isHighSurrogate checks if the rune is a high surrogate (U+D800 to U+DBFF).
func isHighSurrogate(r rune) bool {
	return r >= highSurrogateOffset && r <= 0xDBFF
}

// isLowSurrogate checks if the rune is a low surrogate (U+DC00 to U+DFFF).
func isLowSurrogate(r rune) bool {
	return r >= lowSurrogateOffset && r <= surrogateEnd
}

// combineSurrogates reconstruct the original unicode code points in the
// supplemental plane by combinin the high and low surrogate.
//
// The hight surrogate in the range from U+D800 to U+DBFF,
// and the low surrogate in the range from U+DC00 to U+DFFF.
//
// The formula to combine the surrogates is:
// (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000
func combineSurrogates(high, low rune) rune {
	return ((high - highSurrogateOffset) << 10) + (low - lowSurrogateOffset) + supplementalPlanesOffset
}

// deocdeSingleUnicodeEscape decodes a unicode escape sequence (e.g., \uXXXX) into a rune.
func decodeSingleUnicodeEscape(b []byte) (rune, bool) {
	if len(b) < 6 {
		return utf8.RuneError, false
	}

	// convert hex to decimal
	h1, h2, h3, h4 := h2i(b[2]), h2i(b[3]), h2i(b[4]), h2i(b[5])
	if h1 == badHex || h2 == badHex || h3 == badHex || h4 == badHex {
		return utf8.RuneError, false
	}

	return rune(h1<<12 + h2<<8 + h3<<4 + h4), true
}

// decodeUnicodeEscape decodes a Unicode escape sequence from a byte slice.
// It handles both single Unicode escape sequences and surrogate pairs.
func decodeUnicodeEscape(b []byte) (rune, int) {
	// decode the first Unicode escape sequence.
	r, ok := decodeSingleUnicodeEscape(b)
	if !ok {
		return utf8.RuneError, -1
	}

	// if the rune is within the BMP and not a surrogate, return it
	if r <= basicMultilingualPlaneOffset && !isSurrogatePair(r) {
		return r, 6
	}

	if !isHighSurrogate(r) {
		// invalid surrogate pair.
		return utf8.RuneError, -1
	}

	// if the rune is a high surrogate, need to decode the next escape sequence.

	// ensure there are enough bytes for the next escape sequence.
	if len(b) < surrogatePairLen {
		return utf8.RuneError, -1
	}
	// decode the second Unicode escape sequence.
	r2, ok := decodeSingleUnicodeEscape(b[singleUnicodeEscapeLen:])
	if !ok {
		return utf8.RuneError, -1
	}
	// check if the second rune is a low surrogate.
	if isLowSurrogate(r2) {
		combined := combineSurrogates(r, r2)
		return combined, surrogatePairLen
	}
	return utf8.RuneError, -1
}

var escapeByteSet = [256]byte{
	'"':  doubleQuote,
	'\\': backSlash,
	'/':  slash,
	'b':  backSpace,
	'f':  formFeed,
	'n':  newLine,
	'r':  carriageReturn,
	't':  tab,
}

// Unquote takes a byte slice and unquotes it by removing
// the surrounding quotes and unescaping the contents.
func Unquote(s []byte, border byte) (string, bool) {
	s, ok := unquoteBytes(s, border)
	return string(s), ok
}

// unquoteBytes takes a byte slice and unquotes it by removing
func unquoteBytes(s []byte, border byte) ([]byte, bool) {
	if len(s) < 2 || s[0] != border || s[len(s)-1] != border {
		return nil, false
	}

	s = s[1 : len(s)-1]

	r := 0
	for r < len(s) {
		c := s[r]

		if c == backSlash || c == border || c < 0x20 {
			break
		}

		if c < utf8.RuneSelf {
			r++
			continue
		}

		rr, size := utf8.DecodeRune(s[r:])
		if rr == utf8.RuneError && size == 1 {
			break
		}

		r += size
	}

	if r == len(s) {
		return s, true
	}

	utfDoubleMax := utf8.UTFMax * 2
	b := make([]byte, len(s)+utfDoubleMax)
	w := copy(b, s[0:r])

	for r < len(s) {
		if w >= len(b)-utf8.UTFMax {
			nb := make([]byte, utfDoubleMax+(2*len(b)))
			copy(nb, b)
			b = nb
		}

		c := s[r]
		if c == backSlash {
			r++
			if r >= len(s) {
				return nil, false
			}

			if s[r] == 'u' {
				rr, res := decodeUnicodeEscape(s[r-1:])
				if res < 0 {
					return nil, false
				}

				w += utf8.EncodeRune(b[w:], rr)
				r += 5
			} else {
				decode := escapeByteSet[s[r]]
				if decode == 0 {
					return nil, false
				}

				if decode == doubleQuote || decode == backSlash || decode == slash {
					decode = s[r]
				}

				b[w] = decode
				r++
				w++
			}
		} else if c == border || c < 0x20 {
			return nil, false
		} else if c < utf8.RuneSelf {
			b[w] = c
			r++
			w++
		} else {
			rr, size := utf8.DecodeRune(s[r:])

			if rr == utf8.RuneError && size == 1 {
				return nil, false
			}

			r += size
			w += utf8.EncodeRune(b[w:], rr)
		}
	}

	return b[:w], true
}

// processEscapedUTF8 converts escape sequences to UTF-8 characters.
// It decodes Unicode escape sequences (\uXXXX) to UTF-8 and
// converts standard escape sequences (e.g., \n) to their corresponding special characters.
func processEscapedUTF8(in, out []byte) (int, int, error) {
	if len(in) < 2 || in[0] != backSlash {
		return -1, -1, errInvalidEscapeSequence
	}

	escapeSeqLen := 2
	escapeChar := in[1]

	if escapeChar != 'u' {
		val := escapeByteSet[escapeChar]
		if val == 0 {
			return -1, -1, errInvalidEscapeSequence
		}

		out[0] = val
		return escapeSeqLen, 1, nil
	}

	r, size := decodeUnicodeEscape(in)
	if size == -1 {
		return -1, -1, errInvalidEscapeSequence
	}

	outLen := utf8.EncodeRune(out, r)

	return size, outLen, nil
}