Files
complexity/complexity.go

80 lines
1.5 KiB
Go
Raw Permalink Normal View History

2025-08-13 12:17:25 -04:00
// Complexity performs some rudimentary entropy calculations based on ASCII bi-grams, which can be
// used to censor potential secret keys or passwords in text, and to estimate password strength.
//
// The matrix it uses only covers printable ASCII text, other runes are all interpreted as being \x127.
//
// The package isn't well tested, and the source data the matrix tabe
package complexity
import (
"bufio"
"io"
"unicode/utf8"
)
type state struct {
prev rune
count int64
entropy float64
}
func runeIdx(r rune) int {
if r < firstRune || r > lastRune {
return sz - 1
}
return int(r - firstRune)
}
func (s *state) put(next rune) {
s.entropy += float64(matrix[runeIdx(s.prev)*sz+runeIdx(next)])
s.count++
s.prev = next
}
func runeReader(r io.RuneReader) (float32, int64, error) {
s := state{prev: lastRune}
for {
r, _, err := r.ReadRune()
if err != nil {
if err == io.EOF {
err = nil
}
return float32(s.entropy), s.count, err
}
s.put(r)
}
}
func Reader(r io.Reader) (float32, int64, error) {
if r, ok := r.(io.RuneReader); ok {
return runeReader(r)
}
return runeReader(bufio.NewReader(r))
}
func Bytes(text []byte) (float32, int) {
s := state{prev: lastRune}
for i := 0; i != len(text); {
r, sz := utf8.DecodeRune(text[i:])
i += sz
s.put(r)
}
return float32(s.entropy), int(s.count)
}
func String(text string) (float32, int) {
s := state{prev: lastRune}
for _, r := range text {
s.put(r)
}
return float32(s.entropy), int(s.count)
}