Skip to content

Commit

Permalink
compute max parseable fragment and prune LevFSA
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed Oct 25, 2024
1 parent 054918c commit 19322b3
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 32 deletions.
33 changes: 17 additions & 16 deletions src/commonMain/kotlin/ai/hypergraph/kaliningraph/automata/FSA.kt
Original file line number Diff line number Diff line change
Expand Up @@ -81,22 +81,23 @@ open class FSA(open val Q: TSA, open val init: Set<Σᐩ>, open val final: Set<
fun parikhVector(from: Int, to: Int): ParikhVector =
parikhVector.getOrPut(from..to) { levString.subList(from, to).parikhVector() }

val levString: List<Σᐩ> by lazy {
val t = stateCoords.filter { it.π3 == 0 }.maxOf { it.π2 }
val maxY = stateCoords.maxOf { it.π3 }
val pad = (t * maxY).toString().length
// println("Max state: $t")
val padY = "0".padStart(pad, '0')
(0..<t).map { "q_${it.toString().padStart(pad, '0')}/$padY" to "q_${(it+1).toString().padStart(pad, '0')}/$padY" }
.map { (a, b) ->
val lbl = edgeLabels[a to b]
// if (lbl == null) {
// println("Failed to lookup: $a to $b")
// println(edgeLabels)
// }
lbl!!
}
}
var levString: List<Σᐩ> = emptyList()
// by lazy {
// val t = stateCoords.filter { it.π3 == 0 }.maxOf { it.π2 }
// val maxY = stateCoords.maxOf { it.π3 }
// val pad = (t * maxY).toString().length
//// println("Max state: $t")
// val padY = "0".padStart(pad, '0')
// (0..<t).map { "q_${it.toString().padStart(pad, '0')}/$padY" to "q_${(it+1).toString().padStart(pad, '0')}/$padY" }
// .map { (a, b) ->
// val lbl = edgeLabels[a to b]
//// if (lbl == null) {
//// println("Failed to lookup: $a to $b")
//// println(edgeLabels)
//// }
// lbl!!
// }
// }

fun walk(from: Σᐩ, next: (Σᐩ, List<Σᐩ>) -> Int): List<Σᐩ> {
val startVtx = from
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ fun pruneInactiveRules(cfg: CFG): CFG =
TODO("Identify and prune all nonterminals t generating" +
"a finite language rooted at t and disjoint from the upward closure.")

fun CFG.maxParsableFragment(tokens: List<String>, padRight: Int = 3): Int =
(1..tokens.size).first { i ->
val blocked = tokens.mapIndexed { j, t -> if (j < i) t else "_" } + List(padRight) { "_" }
// println(blocked)
blocked !in language
}

// REL ⊂ CFL ⊂ CJL
operator fun REL.contains(s: Σᐩ): Bln = s in reg.asCFG.language
operator fun CFL.contains(s: Σᐩ): Bln = cfg.isValid(s)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,33 +53,41 @@ fun CFG.levenshteinRepair(maxDist: Int, unparseable: List<Σᐩ>, solver: CJL.(L

fun makeLevFSA(str: Σᐩ, dist: Int): FSA = makeLevFSA(str.tokenizeByWhitespace(), dist)

fun Σᐩ.unpackCoordinates() =
substringAfter('_').split('/')
.let { (i, j) -> i.toInt() to j.toInt() }

/** Uses nominal arc predicates. See [NOM] for denominalization. */
fun makeLevFSA(
str: List<Σᐩ>,
dist: Int,
digits: Int = (str.size * dist).toString().length,
lastGoodIndex: Int = str.size
): FSA =
(upArcs(str, dist, digits) +
diagArcs(str, dist, digits) +
str.mapIndexed { i, it -> rightArcs(i, dist, it, digits) }.flatten() +
str.mapIndexed { i, it -> knightArcs(i, dist, it, digits, str) }.flatten())
.let { Q ->
val initialStates = setOf("q_" + pd(0, digits).let { "$it/$it" })
fun Σᐩ.unpackCoordinates() =
substringAfter('_').split('/')
.let { (i, j) -> i.toInt() to j.toInt() }

val finalStates = mutableSetOf<String>()
Q.states.forEach {
val (i, j) = it.unpackCoordinates()
if ((str.size - i + j).absoluteValue <= dist) finalStates.add(it)
.also {
println("Levenshtein-${str.size}x$dist automaton had ${it.size} arcs initially!")
}.filter { arc ->
arc.first.unpackCoordinates().let { (i, j) -> 0 < j || i <= lastGoodIndex + 1 }
}
.let { Q ->
val initialStates = setOf("q_" + pd(0, digits).let { "$it/$it" })

FSA(Q, initialStates, finalStates)
.also { it.height = dist; it.width = str.size }
// .nominalize()
.also { println("Levenshtein-${str.size}x$dist automaton has ${Q.size} arcs!") }
}

val finalStates = mutableSetOf<String>()
Q.states.forEach {
val (i, j) = it.unpackCoordinates()
if ((str.size - i + j).absoluteValue <= dist) finalStates.add(it)
}

FSA(Q, initialStates, finalStates)
.also { it.height = dist; it.width = str.size; it.levString = str }
// .nominalize()
.also { println("Levenshtein-${str.size}x$dist automaton had ${Q.size} arcs finally!") }
}

private fun pd(i: Int, digits: Int) = i.toString().padStart(digits, '0')

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,8 @@ fun CFG.initialUTBMatrix(
): UTMatrix<Blns> =
UTMatrix(
ts = tokens.map { it ->
bmp[listOf(it)].let { nts -> allNTs.map { it in nts } }.toBooleanArray()
if (it == HOLE_MARKER) BooleanArray(nonterminals.size) { true }
else bmp[listOf(it)].let { nts -> allNTs.map { it in nts } }.toBooleanArray()
}.toTypedArray(),
algebra = bitwiseAlgebra
)
Expand Down

0 comments on commit 19322b3

Please sign in to comment.