From 8323c9f6a1b52792979b7e4a7afc6f1cb5ff9add Mon Sep 17 00:00:00 2001 From: rowan Date: Wed, 16 Apr 2025 14:53:46 -0500 Subject: [PATCH] split up parser.js --- .gitmodules | 3 + jsconfig.json | 16 ++ src/char.js | 36 ++++ src/combinator.js | 34 ++++ src/cond.js | 18 ++ src/const.js | 6 + src/fn.js | 99 +++++++++++ src/index.js | 6 +- src/iter.js | 406 ++++++++++++++++++++++++++++++++++++++++++++++ src/parser.js | 126 -------------- src/seq.js | 55 +++++++ src/state.js | 23 +++ vendor/izuna | 1 + vendor/kojima | 2 +- 14 files changed, 703 insertions(+), 128 deletions(-) create mode 100644 jsconfig.json create mode 100644 src/char.js create mode 100644 src/combinator.js create mode 100644 src/cond.js create mode 100644 src/const.js create mode 100644 src/fn.js create mode 100644 src/iter.js delete mode 100644 src/parser.js create mode 100644 src/seq.js create mode 100644 src/state.js create mode 160000 vendor/izuna diff --git a/.gitmodules b/.gitmodules index bcab54b..fbf17c0 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "vendor/kojima"] path = vendor/kojima url = https://git.kitsu.cafe/rowan/kojima.git +[submodule "vendor/izuna"] + path = vendor/izuna + url = https://git.kitsu.cafe/rowan/izuna.git diff --git a/jsconfig.json b/jsconfig.json new file mode 100644 index 0000000..e74eccc --- /dev/null +++ b/jsconfig.json @@ -0,0 +1,16 @@ +{ + "compilerOptions": { + "strict": true, + "module": "es2020", + "target": "es6", + "lib": ["es2022", "dom"], + "checkJs": false, + "paths": { + "/*": ["./*"] + } + }, + "exclude": [ + "node_modules" + ] +} + diff --git a/src/char.js b/src/char.js new file mode 100644 index 0000000..50b23fc --- /dev/null +++ b/src/char.js @@ -0,0 +1,36 @@ +import { State } from './state.js' +import { anyOf } from './combinator.js' +import { Alpha, Alphanumeric, Digits, LowerAlpha, UpperAlpha } from './const.js' +import { fail, join, mapStr, next, succeed } from './fn.js' +import { map, seq } from './seq.js' +import { curry } from '../vendor/izuna/src/curry.js' + +/** @import { ParserState } from './state.js' */ + +export const char = curry( + /** + * @param {string} ch + * @param {ParserState} state + */ + (ch, state) => ( + next(state) === ch ? succeed(ch, state) : fail(`could not parse ${ch} `, state) + )) + +export const str = curry( + /** + * @param {string} str + * @param {State} state + */ + (str, state) => ( + map( + join(''), + seq(...mapStr(char, str)) + )(state) + )) + +export const digit = anyOf(Digits) +export const lowerAlpha = anyOf(LowerAlpha) +export const upperAlpha = anyOf(UpperAlpha) +export const alpha = anyOf(Alpha) +export const alphanumeric = anyOf(Alphanumeric) + diff --git a/src/combinator.js b/src/combinator.js new file mode 100644 index 0000000..25b665d --- /dev/null +++ b/src/combinator.js @@ -0,0 +1,34 @@ +import { char } from './char.js' +import { fail, fork, mapStr } from './fn.js' +import { curry } from '../vendor/izuna/src/curry.js' + +/** @import { ParserState } from './state.js' */ + +/** + * @param {...any} parsers + */ +export const any = (...parsers) => + /** + * @param {ParserState} state + */ + state => { + for (const parser of parsers) { + const [original, clone] = fork(state) + const result = parser(clone) + if (result.isOk) { + return result + } + } + + return fail('no matching parsers', state) + } + +export const anyOf = curry( + /** + * @param {string} str + * @param {ParserState} state + */ + (str, state) => ( + any(...mapStr(char, str))(state) + )) + diff --git a/src/cond.js b/src/cond.js new file mode 100644 index 0000000..9b32d8d --- /dev/null +++ b/src/cond.js @@ -0,0 +1,18 @@ +import { ParseError } from './state.js' +import { curry } from '../vendor/izuna/src/index.js' +import { fork, succeed } from './fn.js' + +/** @import { Result } from '../vendor/kojima/src/index.js' */ +/** @import { ParserState } from './state.js' */ + +export const maybe = curry( + /** + * @param {(...args: any[]) => Result} parser + * @param {ParserState} state + */ + (parser, state) => { + const [original, clone] = fork(state) + const result = parser(clone) + return result.isOk() ? result : succeed([], original) + }) + diff --git a/src/const.js b/src/const.js new file mode 100644 index 0000000..f51d393 --- /dev/null +++ b/src/const.js @@ -0,0 +1,6 @@ +export const LowerAlpha = 'abcdefghijklmnopqrstuvwxyz' +export const UpperAlpha = LowerAlpha.toUpperCase() +export const Alpha = LowerAlpha + UpperAlpha +export const Digits = '1234567890' +export const Alphanumeric = Alpha + Digits + diff --git a/src/fn.js b/src/fn.js new file mode 100644 index 0000000..82dbc88 --- /dev/null +++ b/src/fn.js @@ -0,0 +1,99 @@ +import { ParseError } from './state.js' +import { Iter } from './iter.js' +import { err, ok } from '../vendor/kojima/src/index.js' +import { curry } from '/vendor/izuna/src/index.js' + +/** @import { ParserState } from './state.js'* / + +/** + * @param {...any} values + */ +export const Tuple = (...values) => Object.freeze(values) + +/** + * @template T + * @param {Iterator | Iterable} iterable + * @param {number} [n=2] + */ +function tee(iterable, n = 2) { + const iterator = Iter.from(iterable) + + /** + * @param {{ next: any, value: T }} current + */ + function* gen(current) { + while (true) { + if (!current.next) { + const { done, value } = iterator.next() + if (done) return + current.next = { value } + } + + current = current.next + yield current.value + } + } + + return Array(n).fill({}).map(gen) +} + +/** + * @param {import('./state.js').ParserState} state + */ +export const fork = ([tokens, state]) => { + const [a, b] = tee(state) + return Tuple( + Tuple(tokens.slice(), a), + Tuple(tokens.slice(), b), + ) +} + +/** + * @template T + * @param {T | T[]} v + * @param {ParserState} state + */ +export const succeed = (v, [x, y]) => ok(Tuple(x.concat(v), y)) + +/** + * @param {string} msg + * @param {ParserState} state + * @param {Error} [e] + */ +export const fail = (msg, state, e = undefined) => err(new ParseError(msg, state, e)) + +/** + * @template T + * @param {number & keyof T} n + * @param {T[]} iter + */ +export const nth = (n, iter) => iter[n] + +/** + * @param {ParserState} state + */ +export const next = state => state[1].next().value + +/** + * @template T + * @param {T[]} a + * @param {T[]} b + */ +export const diff = (a, b) => b.slice(-Math.max(0, b.length - a.length)) + +export const join = curry( + /** + * @param {string} delim + * @param {string[]} val + */ + (delim, val) => val.join(delim) +) + +export const mapStr = curry( + /** + * @param {(...args: any[]) => any} fn + * @param {string} str + */ + (fn, str) => Array.from(str).map(v => fn(v)) +) + diff --git a/src/index.js b/src/index.js index 7236d68..a797e01 100644 --- a/src/index.js +++ b/src/index.js @@ -1,2 +1,6 @@ -export * from './parser.js' +export * from './char.js' +export * from './combinator.js' +export * from './cond.js' +export * from './seq.js' +export * from './state.js' diff --git a/src/iter.js b/src/iter.js new file mode 100644 index 0000000..25e5e05 --- /dev/null +++ b/src/iter.js @@ -0,0 +1,406 @@ +/** + * @template T + * @implements Iterator + * @implements Iterable + */ +export class Iter { + _iterator + + /** + * @param {Iterator} iterator + */ + constructor(iterator) { + this._iterator = iterator + } + + /** + * @template T + * @param {any} value + * @returns {value is Iterable} + */ + static _isIterable(value) { + return Object.hasOwn(value, Symbol.iterator) + && typeof value[Symbol.iterator] === 'function' + } + + /** + * @template T + * @param {T} value + */ + static from(value) { + if (value instanceof Iter) { + return value + } + + if (Iter._isIterable(value)) { + const iterator = value[Symbol.iterator]() + + if (iterator instanceof Iter) { + return iterator + } + + return new Iter(iterator) + } + + throw new TypeError('object is not an iterator') + } + + /** + * @param {any} [value] + */ + next(value) { + return this._iterator.next(value) + } + + /** + * @param {any} [value] + */ + return(value) { + // @ts-ignore + return this._iterator.return(value) + } + + /** + * @param {any} err + */ + throw(err) { + // @ts-ignore + return this._iterator.throw(err) + } + + /** + * @param {number} limit + */ + drop(limit) { + return new DropIter(this, limit) + } + + /** + * @param {(value: T, index: number) => boolean} callbackFn + */ + every(callbackFn) { + let next = this.next() + let index = 0 + let result = true + + while (!next.done) { + if (!callbackFn(next.value, index)) { + result = false + break + } + + next = this.next() + index += 1 + } + + this.return() + return result + } + + /** + * @param {(value: T, index: number) => boolean} callbackFn + */ + filter(callbackFn) { + return new FilterIter(this, callbackFn) + } + + /** + * @param {(value: T, index: number) => boolean} callbackFn + */ + find(callbackFn) { + let next = this.next() + let index = 0 + + while (!next.done) { + if (callbackFn(next.value, index)) { + this.return() + return next.value + } + + next = this.next() + index += 1 + } + } + + /** + * @param {(value: T, index: number) => U} callbackFn + */ + flatMap(callbackFn) { + return new FlatMapIter(this, callbackFn) + } + + /** + * @param {(value: T, index: number) => void} callbackFn + */ + forEach(callbackFn) { + let next = this.next() + let index = 0 + + while (!next.done) { + callbackFn(next.value, index) + next = this.next() + index += 1 + } + } + + /** + * @param {(value: T, index: number) => U} callbackFn + */ + map(callbackFn) { + return new MapIter(this, callbackFn) + } + + /** + * @template U + * @param {(accumulator: U, value: T, index: number) => U} callbackFn + * @param {U} init + */ + reduce(callbackFn, init) { + let next = this.next() + let index = 0 + let acc = init + + while (!next.done) { + acc = callbackFn(acc, next.value, index) + + next = this.next() + index += 1 + } + + this.return() + return acc + } + + /** + * @param {(value: T, index: number) => boolean} callbackFn + */ + some(callbackFn) { + let next = this.next() + let index = 0 + let result = false + + while (!next.done) { + if (callbackFn(next.value, index)) { + result = true + break + } + + next = this.next() + index += 1 + } + + this.return() + return result + } + + /** + * @param {number} limit + */ + take(limit) { + return new TakeIter(this, limit) + } + + /* + * @returns {T[]} + */ + toArray() { + /** @type {T[]} */ + const result = [] + + for (const item of this) { + result.push(item) + } + + return result + } + + *[Symbol.iterator]() { + return this + } +} + +/** + * @template T + * @extends Iter + */ +class DropIter extends Iter { + _limit + + /** + * @param {Iterator} iterator + * @param {number} limit + */ + constructor(iterator, limit) { + super(iterator) + this._limit = limit + } + + /** + * @param {any} value + */ + next(value) { + for (let i = this._limit; i > 0; i--) { + const next = super.next(value) + + if (next.done) { + return next + } + } + + return super.next(value) + } +} + +/** + * @template T + * @extends Iter + */ +class FilterIter extends Iter { + _filter + _index = 0 + + /** + * @param {Iterator} iterator + * @param {(value: T, index: number) => boolean} callbackFn + */ + constructor(iterator, callbackFn) { + super(iterator) + this._filter = callbackFn + } + + /** + * @param {any} [value] + * @returns {IteratorResult} + */ + next(value) { + let next = super.next(value) + + while (!next.done && !this._filter(next.value, this._index)) { + next = super.next(value) + this._index += 1 + } + + return next + } +} + +/** + * @template T + * @extends Iter + */ +class FlatMapIter extends Iter { + _flatMap + _index = 0 + + /** @type {Iterator | undefined} */ + _inner = undefined + + /** + * @param {Iterator} iterator + * @param {(value: T, index: number) => Iterator | Iterable} callbackFn + */ + constructor(iterator, callbackFn) { + super(iterator) + this._flatMap = callbackFn + } + + /** + * @param {any} value + * @returns {IteratorResult} + */ + next(value) { + if (this._inner) { + const innerResult = this._inner.next(value) + + if (!innerResult.done) { + this._index += 1 + return { value: innerResult.value, done: false } + } + + this._inner = undefined + } + + const outerResult = super.next(value) + if (outerResult.done) { + return { value: undefined, done: true } + } + + const nextIterable = this._flatMap(outerResult.value, this._index || 0) + if (Iter._isIterable(nextIterable)) { + this._inner = Iter.from(nextIterable) + return this.next(value) + } else { + throw new TypeError('value is not an iterator') + } + } +} + +/** + * @template T + * @extends Iter + */ +class MapIter extends Iter { + _map + _index = 0 + + /** + * @param {Iterator} iterator + * @param {(value: T, index: number) => U} callbackFn + */ + constructor(iterator, callbackFn) { + super(iterator) + this._map = callbackFn + } + + /** @param {any} value */ + next(value) { + let next = super.next(value) + + if (next.done) { + return next + } + + const result = { + done: false, + value: this._map(next.value, this._index) + } + + this._index += 1 + return result + } +} + + +/** + * @template T + * @extends Iter + */ +class TakeIter extends Iter { + _limit + + /** + * @param {Iterator} iterator + * @param {number} limit + */ + constructor(iterator, limit) { + super(iterator) + this._limit = limit + } + + /** + * @param {any} value + * @returns {IteratorResult} + */ + next(value) { + if (this._limit > 0) { + const next = super.next(value) + if (!next.done) { + this._limit -= 1 + } + return next + } + + return { value: undefined, done: true } + } +} + diff --git a/src/parser.js b/src/parser.js deleted file mode 100644 index eca5eb8..0000000 --- a/src/parser.js +++ /dev/null @@ -1,126 +0,0 @@ -import { Ok, Err, curry } from '../vendor/kojima/src/index.js' - -export class ParseError extends Error { - constructor(message, state, source) { - super(message) - this.state = state - this.source = source - } -} - -const Tuple = (...values) => Object.freeze(values) -const State = value => Tuple([], Iterator.from(value)) - -const LowerAlpha = 'abcdefghijklmnopqrstuvwxyz' -const UpperAlpha = LowerAlpha.toUpperCase() -const Alpha = LowerAlpha + UpperAlpha -const Digits = '1234567890' -const Alphanumeric = Alpha + Digits - - -const tee = (iterator, n = 2) => { - iterator = Iterator.from(iterator) - - function* gen(current) { - while (true) { - if (!current.next) { - const { done, value } = iterator.next() - if (done) { return } - current.next = { value } - } - current = current.next - yield current.value - } - } - - return Array(n).fill({}).map(gen) -} - -const fork = ([tokens, state]) => { - const [a, b] = tee(state) - return Tuple( - Tuple(tokens.slice(), a), - Tuple(tokens.slice(), b), - ) -} - -export const succeed = (v, [x, y]) => Ok(Tuple(x.concat(v), y)) -export const fail = (msg, state, err = undefined) => Err(new ParseError(msg, state, err)) -const nth = (n, iter) => iter[n] -const next = state => nth(1, state).next().value -const diff = (a, b) => b.slice(-Math.max(0, b.length - a.length)) -const join = curry((delim, val) => val.join(delim)) -const mapStr = curry((fn, str) => Array.from(str).map(v => fn(v))) - -export const any = (...parsers) => state => { - for (const parser of parsers) { - const [original, clone] = fork(state) - const result = parser(clone) - if (result.isOk) { - return result - } - } - - return fail('no matching parsers', state) -} - -export const anyOf = curry((str, state) => ( - any(...mapStr(char, str))(state) -)) - -export const seq = (...parsers) => state => { - let acc = Ok(state) - - for (const parser of parsers) { - if (acc.isOk) { - acc = acc.bind(parser) - } else { - break - } - } - - return acc -} - -export const map = curry((fn, parser, state) => { - return parser(state).bind(result => { - try { - const parsed = diff(state[0], result[0]) - const backtrack = Tuple(state[0], result[1]) - return succeed(fn(parsed), backtrack) - } catch (e) { - return fail('failed to map', state, e) - } - }) -}) - -export const char = curry((ch, state) => ( - next(state) === ch ? succeed(ch, state) : fail(`could not parse ${ch} `, state) -)) - -export const anyChar = state => { - const ch = next(state) - return !!ch ? succeed(ch, state) : fail(`could not parse ${ch}`, state) -} - -export const str = curry((str, state) => ( - map( - join(''), - seq(...mapStr(char, str)) - )(state) -)) - -export const digit = anyOf(Digits) -export const lowerAlpha = anyOf(LowerAlpha) -export const upperAlpha = anyOf(UpperAlpha) -export const alpha = anyOf(Alpha) -export const alphanumeric = anyOf(Alphanumeric) - -export const maybe = curry((parser, state) => { - const [original, clone] = fork(state) - const result = parser(clone) - return result.isOk ? result : succeed([], original) -}) - -export const parse = curry((parser, input) => parser(State(input))) - diff --git a/src/seq.js b/src/seq.js new file mode 100644 index 0000000..1a8c998 --- /dev/null +++ b/src/seq.js @@ -0,0 +1,55 @@ +import { diff, fail, succeed, Tuple } from './fn.js' +import { ok } from '../vendor/kojima/src/index.js' +import { curry } from '../vendor/izuna/src/curry.js' + +/** @import { ParseError, ParserState } from './state.js' */ +/** @import { Result } from '../vendor/kojima/src/index.js' */ + +/** + * @typedef {(value: any) => Result} Parser + */ + +/** + * @param {...Parser} parsers + */ +export const seq = (...parsers) => + /** @param {ParserState} state */ + state => { + let acc = ok(state) + + for (const parser of parsers) { + if (acc.isOk()) { + acc = acc.bind(parser) + } else { + break + } + } + + return acc + } + +export const map = curry( + /** + * @param {(...args: any[]) => any} fn + * @param {Parser} parser + * @param {ParserState} state + */ + (fn, parser, state) => { + return parser(state).map(result => { + try { + /** @type {Result} */ + const parsed = result.chain(otherState => + fn(diff(state[0], otherState[0])) + ) + + const backtrack = result.chain(otherState => + Tuple(state[0], otherState[1]) + ) + + return succeed(parsed, backtrack) + } catch (e) { + return fail('failed to map', state, e) + } + }) + }) + diff --git a/src/state.js b/src/state.js new file mode 100644 index 0000000..071fcbe --- /dev/null +++ b/src/state.js @@ -0,0 +1,23 @@ +import { Iter } from './iter.js' +/** + * @typedef {Readonly<[any[], Iterator]>} ParserState + */ + +export class ParseError extends Error { + /** + * @param {string} message + * @param {ParserState} state + * @param {Error} [cause] + */ + constructor(message, state, cause) { + super(message, { cause }) + this.state = state + } +} + +/** + * @param {any} value + * @returns {ParserState} + */ +export const State = value => Object.freeze([[], Iter.from(value)]) + diff --git a/vendor/izuna b/vendor/izuna new file mode 160000 index 0000000..aa70427 --- /dev/null +++ b/vendor/izuna @@ -0,0 +1 @@ +Subproject commit aa70427c8c349bbfe4576cba878f5b44859007d4 diff --git a/vendor/kojima b/vendor/kojima index 1402219..d661524 160000 --- a/vendor/kojima +++ b/vendor/kojima @@ -1 +1 @@ -Subproject commit 1402219aad22f55c39ac448bff372474a17f5c00 +Subproject commit d6615248572d2e5c16661d8aab0650ae28aeb6c2