package complexity import ( "io" "regexp" "regexp/syntax" "slices" "strings" "unicode/utf8" "golang.org/x/text/transform" ) var secretKeyPrefixes = []string{ `sk-`, `pk-`, `rk-`, `ghp_`, `gho_`, `ghu_`, `ghs_`, `github_pat_`, `glpat-`, `glptt-`, `gldt-`, `xoxb-`, `xoxp-`, `xoxa-`, `xoxr-`, `ya29.`, `1//`, `SG.`, `key-`, `token-`, `api_`, `pat_`, } func makeSecretKeyPrefixPattern() string { var b strings.Builder b.WriteString(`(?:`) for i, prefix := range secretKeyPrefixes { if i != 0 { b.WriteRune('|') } b.WriteString(regexp.QuoteMeta(prefix)) } b.WriteRune(')') return b.String() } func maxPrefixLength() (sz int) { for _, prefix := range secretKeyPrefixes { sz = max(sz, len(prefix)) } return } const ( minKeyLength = 8 sensitiveDataThreshold = textCorpusMeanEntropy + (leakedPasswordMeanEntropy-textCorpusMeanEntropy)*0.8 + (randomBase64MeanEntropy-textCorpusMeanEntropy)*0.2 keyPrefixThreshold = textCorpusMeanEntropy + (sensitiveDataThreshold-textCorpusMeanEntropy)*0.5 censoredStr = "********" ) var ( maxSafeBlindCopySize = maxPrefixLength() + minKeyLength rePossibleKey = regexp.MustCompile(`\b` + makeSecretKeyPrefixPattern() + `[\w=+,\-]{8,}|\b[^\s]{8,}\b`) reSecretKeyPrefix = regexp.MustCompile(`^` + makeSecretKeyPrefixPattern()) censoredBytes = []byte(censoredStr) ) func replaceLikelyKeyBytes(fragment []byte) []byte { if match := reSecretKeyPrefix.Find(fragment); len(match) > 0 { entropy, cnt := Bytes(fragment[len(match):]) if entropy/float32(cnt) >= keyPrefixThreshold { return append(slices.Clip(match), censoredStr...) } } else { entropy, cnt := Bytes(fragment) if entropy/float32(cnt) >= sensitiveDataThreshold { return censoredBytes } } return fragment } func replaceLikelyKey(fragment string) string { if match := reSecretKeyPrefix.FindString(fragment); match != "" { entropy, cnt := String(fragment[len(match):]) if entropy/float32(cnt) >= keyPrefixThreshold { return match + censoredStr } } else { entropy, cnt := String(fragment) if entropy/float32(cnt) >= sensitiveDataThreshold { return censoredStr } } return fragment } func CensorLikelyKeysBytes(text []byte) []byte { return rePossibleKey.ReplaceAllFunc(text, replaceLikelyKeyBytes) } func CensorLikelyKeys(text string) string { return rePossibleKey.ReplaceAllStringFunc(text, replaceLikelyKey) } type tf struct{} func trimIncompleteLastRune(text []byte) []byte { i := max(0, len(text)-1) for ; i > 0 && !utf8.RuneStart(text[i]); i-- { } r, sz := utf8.DecodeRune(text[i:]) if !(sz == 1 && r == utf8.RuneError) { i += sz } return text[:i] } func trimLastWordBoundaryNextRuneKnown(text []byte, last rune) []byte { i := len(text) for i > 0 { prev, sz := utf8.DecodeLastRune(text[:i]) if syntax.IsWordChar(prev) != syntax.IsWordChar(last) { return text[:i] } i -= sz last = prev } return text[:i] } func trimLastWordBoundary(text []byte) []byte { last, sz := utf8.DecodeLastRune(text) return trimLastWordBoundaryNextRuneKnown(text[:len(text)-sz], last) } // assumes src already ends on a valid cut point // we can't determine that ourselves without knowing the bytes that follow. // if dst is shorter than src, then src is reduced to the same length, and // the possibly incomplete rune and final word boundary is removed prior to copying func copyCompleteWords(dst, src []byte) int { if len(dst) < len(src) { _src := trimIncompleteLastRune(src[:len(dst)]) lastRune, _ := utf8.DecodeRune(src[len(_src):]) src = trimLastWordBoundaryNextRuneKnown(_src, lastRune) } return copy(dst, src) } func (*tf) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { if !atEOF { // make sure the last rune hasn't been cut off. src = trimIncompleteLastRune(src) } matches := rePossibleKey.FindAllIndex(src, -1) if !atEOF { if len(matches) > 0 { // ignore the last match; what matches might change if we had more bytes available. // we'll also adjust the length of src so that we don't try to copy past where the last match begins last := len(matches) - 1 src = src[:matches[last][0]] matches = matches[:last] } else { // ignore the last maxSafeBlindCopySize bytes, // as they could potentially become a match. _src := trimIncompleteLastRune(src[:max(0, len(src)-maxSafeBlindCopySize)]) last, _ := utf8.DecodeRune(src[len(_src):]) src = trimLastWordBoundaryNextRuneKnown(_src, last) } } for _, m := range matches { sz := copyCompleteWords(dst[nDst:], src[nSrc:m[0]]) nDst += sz nSrc += sz if nSrc != m[0] { err = transform.ErrShortDst return } match := reSecretKeyPrefix.Find(src[m[0]:m[1]]) censor := true if len(match) > 0 { entropy, cnt := Bytes(src[m[0]+len(match) : m[1]]) censor = entropy/float32(cnt) >= keyPrefixThreshold } else { entropy, cnt := Bytes(src[m[0]:m[1]]) censor = entropy/float32(cnt) >= sensitiveDataThreshold } if censor { if len(dst[nDst:]) < len(match)+len(censoredBytes) { err = transform.ErrShortDst return } nDst += copy(dst[nDst:], match) nDst += copy(dst[nDst:], censoredBytes) nSrc = m[1] } else { if len(dst[nDst:]) < m[1]-m[0] { err = transform.ErrShortDst return } nDst += copy(dst[nDst:], src[m[0]:m[1]]) nSrc = m[1] } } sz := copyCompleteWords(dst[nDst:], src[nSrc:]) nDst += sz nSrc += sz if nSrc != len(src) { err = transform.ErrShortDst } else if !atEOF { err = transform.ErrShortSrc } return } func (*tf) Reset() { // NOP } // Transformer implements [transform.Transformer], censoring any likely keys it encounters. // // WARNING: The current implementation doesn't match the output of [CensorLikelyKeys], so I definitely // have a bug somewhere. var Transformer transform.Transformer = (*tf)(nil) // Copy copies r to w, censoring any likely keys it encounters. // // WARNING: The current implementation doesn't match the output of [CensorLikelyKeys], so I definitely // have a bug somewhere. func Copy(w io.Writer, r io.Reader) error { _, err := io.Copy(w, transform.NewReader(r, Transformer)) return err }