243 lines
6.0 KiB
Go
243 lines
6.0 KiB
Go
package complexity
|
|
|
|
import (
|
|
"io"
|
|
"regexp"
|
|
"regexp/syntax"
|
|
"slices"
|
|
"strings"
|
|
"unicode/utf8"
|
|
|
|
"golang.org/x/text/transform"
|
|
)
|
|
|
|
var secretKeyPrefixes = []string{
|
|
`sk-`, `pk-`, `rk-`,
|
|
`ghp_`, `gho_`, `ghu_`, `ghs_`, `github_pat_`,
|
|
`glpat-`, `glptt-`, `gldt-`,
|
|
`xoxb-`, `xoxp-`, `xoxa-`, `xoxr-`,
|
|
`ya29.`, `1//`,
|
|
`SG.`, `key-`, `token-`, `api_`, `pat_`,
|
|
}
|
|
|
|
func makeSecretKeyPrefixPattern() string {
|
|
var b strings.Builder
|
|
b.WriteString(`(?:`)
|
|
for i, prefix := range secretKeyPrefixes {
|
|
if i != 0 {
|
|
b.WriteRune('|')
|
|
}
|
|
b.WriteString(regexp.QuoteMeta(prefix))
|
|
}
|
|
b.WriteRune(')')
|
|
return b.String()
|
|
}
|
|
|
|
func maxPrefixLength() (sz int) {
|
|
for _, prefix := range secretKeyPrefixes {
|
|
sz = max(sz, len(prefix))
|
|
}
|
|
return
|
|
}
|
|
|
|
const (
|
|
minKeyLength = 8
|
|
sensitiveDataThreshold = textCorpusMeanEntropy + (leakedPasswordMeanEntropy-textCorpusMeanEntropy)*0.8 + (randomBase64MeanEntropy-textCorpusMeanEntropy)*0.2
|
|
keyPrefixThreshold = textCorpusMeanEntropy + (sensitiveDataThreshold-textCorpusMeanEntropy)*0.5
|
|
censoredStr = "********"
|
|
)
|
|
|
|
var (
|
|
maxSafeBlindCopySize = maxPrefixLength() + minKeyLength
|
|
rePossibleKey = regexp.MustCompile(`\b` + makeSecretKeyPrefixPattern() + `[\w=+,\-]{8,}|\b[^\s]{8,}\b`)
|
|
reSecretKeyPrefix = regexp.MustCompile(`^` + makeSecretKeyPrefixPattern())
|
|
censoredBytes = []byte(censoredStr)
|
|
)
|
|
|
|
func replaceLikelyKeyBytes(fragment []byte) []byte {
|
|
if match := reSecretKeyPrefix.Find(fragment); len(match) > 0 {
|
|
entropy, cnt := Bytes(fragment[len(match):])
|
|
|
|
if entropy/float32(cnt) >= keyPrefixThreshold {
|
|
return append(slices.Clip(match), censoredStr...)
|
|
}
|
|
} else {
|
|
entropy, cnt := Bytes(fragment)
|
|
if entropy/float32(cnt) >= sensitiveDataThreshold {
|
|
return censoredBytes
|
|
}
|
|
}
|
|
|
|
return fragment
|
|
}
|
|
|
|
func replaceLikelyKey(fragment string) string {
|
|
if match := reSecretKeyPrefix.FindString(fragment); match != "" {
|
|
entropy, cnt := String(fragment[len(match):])
|
|
|
|
if entropy/float32(cnt) >= keyPrefixThreshold {
|
|
return match + censoredStr
|
|
}
|
|
} else {
|
|
entropy, cnt := String(fragment)
|
|
if entropy/float32(cnt) >= sensitiveDataThreshold {
|
|
return censoredStr
|
|
}
|
|
}
|
|
|
|
return fragment
|
|
}
|
|
|
|
func CensorLikelyKeysBytes(text []byte) []byte {
|
|
return rePossibleKey.ReplaceAllFunc(text, replaceLikelyKeyBytes)
|
|
}
|
|
|
|
func CensorLikelyKeys(text string) string {
|
|
return rePossibleKey.ReplaceAllStringFunc(text, replaceLikelyKey)
|
|
}
|
|
|
|
type tf struct{}
|
|
|
|
func trimIncompleteLastRune(text []byte) []byte {
|
|
i := max(0, len(text)-1)
|
|
for ; i > 0 && !utf8.RuneStart(text[i]); i-- {
|
|
}
|
|
|
|
r, sz := utf8.DecodeRune(text[i:])
|
|
if !(sz == 1 && r == utf8.RuneError) {
|
|
i += sz
|
|
}
|
|
|
|
return text[:i]
|
|
}
|
|
|
|
func trimLastWordBoundaryNextRuneKnown(text []byte, last rune) []byte {
|
|
i := len(text)
|
|
|
|
for i > 0 {
|
|
prev, sz := utf8.DecodeLastRune(text[:i])
|
|
if syntax.IsWordChar(prev) != syntax.IsWordChar(last) {
|
|
return text[:i]
|
|
}
|
|
i -= sz
|
|
last = prev
|
|
}
|
|
|
|
return text[:i]
|
|
}
|
|
|
|
func trimLastWordBoundary(text []byte) []byte {
|
|
last, sz := utf8.DecodeLastRune(text)
|
|
return trimLastWordBoundaryNextRuneKnown(text[:len(text)-sz], last)
|
|
}
|
|
|
|
// assumes src already ends on a valid cut point
|
|
// we can't determine that ourselves without knowing the bytes that follow.
|
|
// if dst is shorter than src, then src is reduced to the same length, and
|
|
// the possibly incomplete rune and final word boundary is removed prior to copying
|
|
func copyCompleteWords(dst, src []byte) int {
|
|
if len(dst) < len(src) {
|
|
_src := trimIncompleteLastRune(src[:len(dst)])
|
|
lastRune, _ := utf8.DecodeRune(src[len(_src):])
|
|
src = trimLastWordBoundaryNextRuneKnown(_src, lastRune)
|
|
}
|
|
|
|
return copy(dst, src)
|
|
}
|
|
|
|
func (*tf) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
|
if !atEOF {
|
|
// make sure the last rune hasn't been cut off.
|
|
src = trimIncompleteLastRune(src)
|
|
}
|
|
|
|
matches := rePossibleKey.FindAllIndex(src, -1)
|
|
|
|
if !atEOF {
|
|
if len(matches) > 0 {
|
|
// ignore the last match; what matches might change if we had more bytes available.
|
|
// we'll also adjust the length of src so that we don't try to copy past where the last match begins
|
|
last := len(matches) - 1
|
|
src = src[:matches[last][0]]
|
|
matches = matches[:last]
|
|
} else {
|
|
// ignore the last maxSafeBlindCopySize bytes,
|
|
// as they could potentially become a match.
|
|
_src := trimIncompleteLastRune(src[:max(0, len(src)-maxSafeBlindCopySize)])
|
|
last, _ := utf8.DecodeRune(src[len(_src):])
|
|
src = trimLastWordBoundaryNextRuneKnown(_src, last)
|
|
}
|
|
}
|
|
|
|
for _, m := range matches {
|
|
sz := copyCompleteWords(dst[nDst:], src[nSrc:m[0]])
|
|
nDst += sz
|
|
nSrc += sz
|
|
if nSrc != m[0] {
|
|
err = transform.ErrShortDst
|
|
return
|
|
}
|
|
|
|
match := reSecretKeyPrefix.Find(src[m[0]:m[1]])
|
|
censor := true
|
|
|
|
if len(match) > 0 {
|
|
entropy, cnt := Bytes(src[m[0]+len(match) : m[1]])
|
|
censor = entropy/float32(cnt) >= keyPrefixThreshold
|
|
} else {
|
|
entropy, cnt := Bytes(src[m[0]:m[1]])
|
|
censor = entropy/float32(cnt) >= sensitiveDataThreshold
|
|
}
|
|
|
|
if censor {
|
|
if len(dst[nDst:]) < len(match)+len(censoredBytes) {
|
|
err = transform.ErrShortDst
|
|
return
|
|
}
|
|
|
|
nDst += copy(dst[nDst:], match)
|
|
nDst += copy(dst[nDst:], censoredBytes)
|
|
nSrc = m[1]
|
|
} else {
|
|
if len(dst[nDst:]) < m[1]-m[0] {
|
|
err = transform.ErrShortDst
|
|
return
|
|
}
|
|
|
|
nDst += copy(dst[nDst:], src[m[0]:m[1]])
|
|
nSrc = m[1]
|
|
}
|
|
}
|
|
|
|
sz := copyCompleteWords(dst[nDst:], src[nSrc:])
|
|
nDst += sz
|
|
nSrc += sz
|
|
|
|
if nSrc != len(src) {
|
|
err = transform.ErrShortDst
|
|
} else if !atEOF {
|
|
err = transform.ErrShortSrc
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (*tf) Reset() {
|
|
// NOP
|
|
}
|
|
|
|
// Transformer implements [transform.Transformer], censoring any likely keys it encounters.
|
|
//
|
|
// WARNING: The current implementation doesn't match the output of [CensorLikelyKeys], so I definitely
|
|
// have a bug somewhere.
|
|
var Transformer transform.Transformer = (*tf)(nil)
|
|
|
|
// Copy copies r to w, censoring any likely keys it encounters.
|
|
//
|
|
// WARNING: The current implementation doesn't match the output of [CensorLikelyKeys], so I definitely
|
|
// have a bug somewhere.
|
|
func Copy(w io.Writer, r io.Reader) error {
|
|
_, err := io.Copy(w, transform.NewReader(r, Transformer))
|
|
return err
|
|
}
|