Files
complexity/gen.go
2025-08-13 12:17:25 -04:00

113 lines
2.3 KiB
Go

//go:build ignore
package main
import (
"bufio"
"fmt"
"io"
"log"
"math"
"os"
"regexp/syntax"
)
const (
runeStart rune = 32
runeLast rune = 127
sz = int(runeLast + 1 - runeStart)
)
func getCounts(r io.Reader) (matrix [sz * sz]uint64, err error) {
b := bufio.NewReader(r)
prev := runeLast
for {
r, _, err := b.ReadRune()
if err == io.EOF {
return matrix, nil
}
if err != nil {
return matrix, err
}
if r < runeStart || r > runeLast {
r = runeLast
}
matrix[int(prev-runeStart)*sz+int(r-runeStart)]++
if prev != runeLast && !syntax.IsWordChar(prev) && syntax.IsWordChar(r) {
// when transitioning to a non-word to a word, train this as if the previous
// rune were lastRune, as this seems like a sensible way to start a piece of random text.
matrix[(sz-1)*sz+int(r-runeStart)]++
}
prev = r
}
}
func calcEntropy(in [sz * sz]uint64) (out [sz * sz]float64) {
var fallbackCount [sz]uint64
var fallbackSum uint64
for i, cnt := range in {
fallbackCount[i%sz] += cnt
fallbackSum += cnt
}
for row := range sz {
var sum uint64
for _, c := range in[row*sz : row*sz+sz] {
sum += c
}
for column, c := range in[row*sz : row*sz+sz] {
p := (float64(2*(fallbackSum+1))*float64(c) + float64(2*fallbackCount[column]+1)) / float64(float64(2*(fallbackSum+1))*float64(sum+1))
if p <= 0 || p >= 1 {
log.Printf("c=%d fallbackSum=%d fallbackCount[column]=%d sum=%d\n", c, fallbackSum, fallbackCount[column], sum)
log.Fatalf("got p=%f for %c->%c\n", p, runeStart+rune(row), runeStart+rune(column))
}
out[row*sz+column] = -math.Log2(p)
}
}
return
}
func calcMeanEntropy(counts [sz * sz]uint64, entropies [sz * sz]float64) float64 {
var sumEntropy float64
var sumCount uint64
for i, c := range counts {
sumCount += c
sumEntropy += entropies[i] * float64(c)
}
return sumEntropy / float64(sumCount)
}
func main() {
cnts, err := getCounts(os.Stdin)
if err != nil {
log.Fatal(err)
}
entropy := calcEntropy(cnts)
fmt.Printf("var matrix = [%d*%d]float32{", sz, sz)
for row := range sz {
for i, v := range entropy[row*sz : row*sz+sz] {
if i == 0 {
fmt.Printf("\n ")
} else {
fmt.Printf(" ")
}
fmt.Printf("%f,", v)
}
}
fmt.Printf("}\n\nconst meanEntropy = %f\n", calcMeanEntropy(cnts, entropy))
}