learnlytics-go/templ/safehtml/style.go

// Adapted from https://raw.githubusercontent.com/google/safehtml/3c4cd5b5d8c9a6c5882fba099979e9f50b65c876/style.go

// Copyright (c) 2017 The Go Authors. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd

package safehtml

import (
	"bytes"
	"fmt"
	"net/url"
	"regexp"
	"strings"
)

// SanitizeCSS attempts to sanitize CSS properties.
func SanitizeCSS(property, value string) (string, string) {
	property = SanitizeCSSProperty(property)
	if property == InnocuousPropertyName {
		return InnocuousPropertyName, InnocuousPropertyValue
	}
	return property, SanitizeCSSValue(property, value)
}

func SanitizeCSSValue(property, value string) string {
	if sanitizer, ok := cssPropertyNameToValueSanitizer[property]; ok {
		return sanitizer(value)
	}
	return sanitizeRegular(value)
}

func SanitizeCSSProperty(property string) string {
	if !identifierPattern.MatchString(property) {
		return InnocuousPropertyName
	}
	return strings.ToLower(property)
}

// identifierPattern matches a subset of valid <ident-token> values defined in
// https://www.w3.org/TR/css-syntax-3/#ident-token-diagram. This pattern matches all generic family name
// keywords defined in https://drafts.csswg.org/css-fonts-3/#family-name-value.
var identifierPattern = regexp.MustCompile(`^[-a-zA-Z]+$`)

var cssPropertyNameToValueSanitizer = map[string]func(string) string{
	"background-image":    sanitizeBackgroundImage,
	"font-family":         sanitizeFontFamily,
	"display":             sanitizeEnum,
	"background-color":    sanitizeRegular,
	"background-position": sanitizeRegular,
	"background-repeat":   sanitizeRegular,
	"background-size":     sanitizeRegular,
	"color":               sanitizeRegular,
	"height":              sanitizeRegular,
	"width":               sanitizeRegular,
	"left":                sanitizeRegular,
	"right":               sanitizeRegular,
	"top":                 sanitizeRegular,
	"bottom":              sanitizeRegular,
	"font-weight":         sanitizeRegular,
	"padding":             sanitizeRegular,
	"z-index":             sanitizeRegular,
}

var validURLPrefixes = []string{
	`url("`,
	`url('`,
	`url(`,
}

var validURLSuffixes = []string{
	`")`,
	`')`,
	`)`,
}

func sanitizeBackgroundImage(v string) string {
	// Check for <> as per https://github.com/google/safehtml/blob/be23134998433fcf0135dda53593fc8f8bf4df7c/style.go#L87C2-L89C3
	if strings.ContainsAny(v, "<>") {
		return InnocuousPropertyValue
	}
	for _, u := range strings.Split(v, ",") {
		u = strings.TrimSpace(u)
		var found bool
		for i, prefix := range validURLPrefixes {
			if strings.HasPrefix(u, prefix) && strings.HasSuffix(u, validURLSuffixes[i]) {
				found = true
				u = strings.TrimPrefix(u, validURLPrefixes[i])
				u = strings.TrimSuffix(u, validURLSuffixes[i])
				break
			}
		}
		if !found || !urlIsSafe(u) {
			return InnocuousPropertyValue
		}
	}
	return v
}

func urlIsSafe(s string) bool {
	u, err := url.Parse(s)
	if err != nil {
		return false
	}
	if u.IsAbs() {
		if strings.EqualFold(u.Scheme, "http") || strings.EqualFold(u.Scheme, "https") || strings.EqualFold(u.Scheme, "mailto") {
			return true
		}
		return false
	}
	return true
}

var genericFontFamilyName = regexp.MustCompile(`^[a-zA-Z][- a-zA-Z]+$`)

func sanitizeFontFamily(s string) string {
	for _, f := range strings.Split(s, ",") {
		f = strings.TrimSpace(f)
		if strings.HasPrefix(f, `"`) {
			if !strings.HasSuffix(f, `"`) {
				return InnocuousPropertyValue
			}
			continue
		}
		if !genericFontFamilyName.MatchString(f) {
			return InnocuousPropertyValue
		}
	}
	return s
}

func sanitizeEnum(s string) string {
	if !safeEnumPropertyValuePattern.MatchString(s) {
		return InnocuousPropertyValue
	}
	return s
}

func sanitizeRegular(s string) string {
	if !safeRegularPropertyValuePattern.MatchString(s) {
		return InnocuousPropertyValue
	}
	return s
}

// InnocuousPropertyName is an innocuous property generated by a sanitizer when its input is unsafe.
const InnocuousPropertyName = "zTemplUnsafeCSSPropertyName"

// InnocuousPropertyValue is an innocuous property generated by a sanitizer when its input is unsafe.
const InnocuousPropertyValue = "zTemplUnsafeCSSPropertyValue"

// safeRegularPropertyValuePattern matches strings that are safe to use as property values.
// Specifically, it matches string where every '*' or '/' is followed by end-of-text or a safe rune
// (i.e. alphanumerics or runes in the set [+-.!#%_ \t]). This regex ensures that the following
// are disallowed:
//   - "/*" and "*/", which are CSS comment markers.
//   - "//", even though this is not a comment marker in the CSS specification. Disallowing
//     this string minimizes the chance that browser peculiarities or parsing bugs will allow
//     sanitization to be bypassed.
//   - '(' and ')', which can be used to call functions.
//   - ',', since it can be used to inject extra values into a property.
//   - Runes which could be matched on CSS error recovery of a previously malformed token, such as '@'
//     and ':'. See http://www.w3.org/TR/css3-syntax/#error-handling.
var safeRegularPropertyValuePattern = regexp.MustCompile(`^(?:[*/]?(?:[0-9a-zA-Z+-.!#%_ \t]|$))*$`)

// safeEnumPropertyValuePattern matches strings that are safe to use as enumerated property values.
// Specifically, it matches strings that contain only alphabetic and '-' runes.
var safeEnumPropertyValuePattern = regexp.MustCompile(`^[a-zA-Z-]*$`)

// SanitizeStyleValue escapes s so that it is safe to put between "" to form a CSS <string-token>.
// See syntax at https://www.w3.org/TR/css-syntax-3/#string-token-diagram.
//
// On top of the escape sequences required in <string-token>, this function also escapes
// control runes to minimize the risk of these runes triggering browser-specific bugs.
// Taken from cssEscapeString in safehtml package.
func SanitizeStyleValue(s string) string {
	var b bytes.Buffer
	b.Grow(len(s))
	for _, c := range s {
		switch {
		case c == '\u0000':
			// Replace the NULL byte according to https://www.w3.org/TR/css-syntax-3/#input-preprocessing.
			// We take this extra precaution in case the user agent fails to handle NULL properly.
			b.WriteString("\uFFFD")
		case c == '<', // Prevents breaking out of a style element with `</style>`. Escape this in case the Style user forgets to.
			c == '"', c == '\\', // Must be CSS-escaped in <string-token>. U+000A line feed is handled in the next case.
			c <= '\u001F', c == '\u007F', // C0 control codes
			c >= '\u0080' && c <= '\u009F', // C1 control codes
			c == '\u2028', c == '\u2029':   // Unicode newline characters
			// See CSS escape sequence syntax at https://www.w3.org/TR/css-syntax-3/#escape-diagram.
			fmt.Fprintf(&b, "\\%06X", c)
		default:
			b.WriteRune(c)
		}
	}
	return b.String()
}
Changed: DB Params 2025-03-20 12:35:13 +01:00			`// Adapted from https://raw.githubusercontent.com/google/safehtml/3c4cd5b5d8c9a6c5882fba099979e9f50b65c876/style.go`

			`// Copyright (c) 2017 The Go Authors. All rights reserved.`
			`//`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE file or at`
			`// https://developers.google.com/open-source/licenses/bsd`

			`package safehtml`

			`import (`
			`"bytes"`
			`"fmt"`
			`"net/url"`
			`"regexp"`
			`"strings"`
			`)`

			`// SanitizeCSS attempts to sanitize CSS properties.`
			`func SanitizeCSS(property, value string) (string, string) {`
			`property = SanitizeCSSProperty(property)`
			`if property == InnocuousPropertyName {`
			`return InnocuousPropertyName, InnocuousPropertyValue`
			`}`
			`return property, SanitizeCSSValue(property, value)`
			`}`

			`func SanitizeCSSValue(property, value string) string {`
			`if sanitizer, ok := cssPropertyNameToValueSanitizer[property]; ok {`
			`return sanitizer(value)`
			`}`
			`return sanitizeRegular(value)`
			`}`

			`func SanitizeCSSProperty(property string) string {`
			`if !identifierPattern.MatchString(property) {`
			`return InnocuousPropertyName`
			`}`
			`return strings.ToLower(property)`
			`}`

			`// identifierPattern matches a subset of valid <ident-token> values defined in`
			`// https://www.w3.org/TR/css-syntax-3/#ident-token-diagram. This pattern matches all generic family name`
			`// keywords defined in https://drafts.csswg.org/css-fonts-3/#family-name-value.`
			var identifierPattern = regexp.MustCompile(`^[-a-zA-Z]+$`)

			`var cssPropertyNameToValueSanitizer = map[string]func(string) string{`
			`"background-image": sanitizeBackgroundImage,`
			`"font-family": sanitizeFontFamily,`
			`"display": sanitizeEnum,`
			`"background-color": sanitizeRegular,`
			`"background-position": sanitizeRegular,`
			`"background-repeat": sanitizeRegular,`
			`"background-size": sanitizeRegular,`
			`"color": sanitizeRegular,`
			`"height": sanitizeRegular,`
			`"width": sanitizeRegular,`
			`"left": sanitizeRegular,`
			`"right": sanitizeRegular,`
			`"top": sanitizeRegular,`
			`"bottom": sanitizeRegular,`
			`"font-weight": sanitizeRegular,`
			`"padding": sanitizeRegular,`
			`"z-index": sanitizeRegular,`
			`}`

			`var validURLPrefixes = []string{`
			`url("`,
			`url('`,
			`url(`,
			`}`

			`var validURLSuffixes = []string{`
			`")`,
			`')`,
			`)`,
			`}`

			`func sanitizeBackgroundImage(v string) string {`
			`// Check for <> as per https://github.com/google/safehtml/blob/be23134998433fcf0135dda53593fc8f8bf4df7c/style.go#L87C2-L89C3`
			`if strings.ContainsAny(v, "<>") {`
			`return InnocuousPropertyValue`
			`}`
			`for _, u := range strings.Split(v, ",") {`
			`u = strings.TrimSpace(u)`
			`var found bool`
			`for i, prefix := range validURLPrefixes {`
			`if strings.HasPrefix(u, prefix) && strings.HasSuffix(u, validURLSuffixes[i]) {`
			`found = true`
			`u = strings.TrimPrefix(u, validURLPrefixes[i])`
			`u = strings.TrimSuffix(u, validURLSuffixes[i])`
			`break`
			`}`
			`}`
			`if !found \|\| !urlIsSafe(u) {`
			`return InnocuousPropertyValue`
			`}`
			`}`
			`return v`
			`}`

			`func urlIsSafe(s string) bool {`
			`u, err := url.Parse(s)`
			`if err != nil {`
			`return false`
			`}`
			`if u.IsAbs() {`
			`if strings.EqualFold(u.Scheme, "http") \|\| strings.EqualFold(u.Scheme, "https") \|\| strings.EqualFold(u.Scheme, "mailto") {`
			`return true`
			`}`
			`return false`
			`}`
			`return true`
			`}`

			var genericFontFamilyName = regexp.MustCompile(`^[a-zA-Z][- a-zA-Z]+$`)

			`func sanitizeFontFamily(s string) string {`
			`for _, f := range strings.Split(s, ",") {`
			`f = strings.TrimSpace(f)`
			if strings.HasPrefix(f, `"`) {
			if !strings.HasSuffix(f, `"`) {
			`return InnocuousPropertyValue`
			`}`
			`continue`
			`}`
			`if !genericFontFamilyName.MatchString(f) {`
			`return InnocuousPropertyValue`
			`}`
			`}`
			`return s`
			`}`

			`func sanitizeEnum(s string) string {`
			`if !safeEnumPropertyValuePattern.MatchString(s) {`
			`return InnocuousPropertyValue`
			`}`
			`return s`
			`}`

			`func sanitizeRegular(s string) string {`
			`if !safeRegularPropertyValuePattern.MatchString(s) {`
			`return InnocuousPropertyValue`
			`}`
			`return s`
			`}`

			`// InnocuousPropertyName is an innocuous property generated by a sanitizer when its input is unsafe.`
			`const InnocuousPropertyName = "zTemplUnsafeCSSPropertyName"`

			`// InnocuousPropertyValue is an innocuous property generated by a sanitizer when its input is unsafe.`
			`const InnocuousPropertyValue = "zTemplUnsafeCSSPropertyValue"`

			`// safeRegularPropertyValuePattern matches strings that are safe to use as property values.`
			`// Specifically, it matches string where every '*' or '/' is followed by end-of-text or a safe rune`
			`// (i.e. alphanumerics or runes in the set [+-.!#%_ \t]). This regex ensures that the following`
			`// are disallowed:`
			`// - "/" and "/", which are CSS comment markers.`
			`// - "//", even though this is not a comment marker in the CSS specification. Disallowing`
			`// this string minimizes the chance that browser peculiarities or parsing bugs will allow`
			`// sanitization to be bypassed.`
			`// - '(' and ')', which can be used to call functions.`
			`// - ',', since it can be used to inject extra values into a property.`
			`// - Runes which could be matched on CSS error recovery of a previously malformed token, such as '@'`
			`// and ':'. See http://www.w3.org/TR/css3-syntax/#error-handling.`
			var safeRegularPropertyValuePattern = regexp.MustCompile(`^(?:[/]?(?:[0-9a-zA-Z+-.!#%_ \t]\|$))$`)

			`// safeEnumPropertyValuePattern matches strings that are safe to use as enumerated property values.`
			`// Specifically, it matches strings that contain only alphabetic and '-' runes.`
			var safeEnumPropertyValuePattern = regexp.MustCompile(`^[a-zA-Z-]*$`)

			`// SanitizeStyleValue escapes s so that it is safe to put between "" to form a CSS <string-token>.`
			`// See syntax at https://www.w3.org/TR/css-syntax-3/#string-token-diagram.`
			`//`
			`// On top of the escape sequences required in <string-token>, this function also escapes`
			`// control runes to minimize the risk of these runes triggering browser-specific bugs.`
			`// Taken from cssEscapeString in safehtml package.`
			`func SanitizeStyleValue(s string) string {`
			`var b bytes.Buffer`
			`b.Grow(len(s))`
			`for _, c := range s {`
			`switch {`
			`case c == '\u0000':`
			`// Replace the NULL byte according to https://www.w3.org/TR/css-syntax-3/#input-preprocessing.`
			`// We take this extra precaution in case the user agent fails to handle NULL properly.`
			`b.WriteString("\uFFFD")`
			case c == '<', // Prevents breaking out of a style element with `</style>`. Escape this in case the Style user forgets to.
			`c == '"', c == '\\', // Must be CSS-escaped in <string-token>. U+000A line feed is handled in the next case.`
			`c <= '\u001F', c == '\u007F', // C0 control codes`
			`c >= '\u0080' && c <= '\u009F', // C1 control codes`
			`c == '\u2028', c == '\u2029': // Unicode newline characters`
			`// See CSS escape sequence syntax at https://www.w3.org/TR/css-syntax-3/#escape-diagram.`
			`fmt.Fprintf(&b, "\\%06X", c)`
			`default:`
			`b.WriteRune(c)`
			`}`
			`}`
			`return b.String()`
			`}`