Added random word generator

This commit is contained in:
2024-05-07 18:25:22 +01:00
parent c46405efce
commit 599b2c8365
4 changed files with 269 additions and 0 deletions

View File

@@ -0,0 +1,62 @@
from collections import defaultdict
import json
MHYPH_FILENAME = 'mhyph.txt'
OUTPUT_FILENAME = 'syllables.json'
def def_value():
# MYPH has a 19 syllable long word
return [0] * 19
if __name__ == '__main__':
sd = defaultdict(def_value)
with open(MHYPH_FILENAME, 'rb') as f:
for line in f:
syllables = line.rstrip().split(b'\xa5')
for i, s in enumerate(syllables):
s = s.lower()
# Not sure this is the best way to do this
# But I think it'll work
skip = False
for char in s:
if char < 97 or char > 122:
skip = True
if skip:
continue
sd[s.decode('utf-8')][i] += 1
dictionary = {}
dictionary['syllables and their frequencies per position'] = sd
# Unsure qutie the best term to use here... but these are all the syllables
# stored according to the position they appear in the word
# also stored for each time they appear there - useful (hopefully) for
# making some vaguely real sounding words because it'll kind of represent
# where these syllables are used in real words. Or it might not!
sbp = [ [] ] * 19
for i, s in enumerate(sd):
for j, count in enumerate(sd[s]):
# I have _no_ idea why this needs to be done this way
vector = [s]*count
if i == 0:
sbp[j] = vector
else:
sbp[j].extend(vector)
dictionary['positions and their syllables'] = sbp
# I think I'll probably some "meta" values like totals
# and totals per position or some stuff like that
meta = {}
totals = [0] * 19
for syl in sd:
for i, num in enumerate(sd[syl]):
totals[i] += num
meta['syllable totals'] = totals
meta['total syllables'] = sum(totals)
dictionary['__meta__'] = meta
print(dictionary['__meta__']['syllable totals'])
with open(OUTPUT_FILENAME, 'w') as f:
json.dump(dictionary, f, indent=4)