Added random word generator
This commit is contained in:
62
random_word_generator/process_mhyph.py
Normal file
62
random_word_generator/process_mhyph.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from collections import defaultdict
|
||||
import json
|
||||
|
||||
MHYPH_FILENAME = 'mhyph.txt'
|
||||
OUTPUT_FILENAME = 'syllables.json'
|
||||
|
||||
def def_value():
|
||||
# MYPH has a 19 syllable long word
|
||||
return [0] * 19
|
||||
|
||||
if __name__ == '__main__':
|
||||
sd = defaultdict(def_value)
|
||||
with open(MHYPH_FILENAME, 'rb') as f:
|
||||
for line in f:
|
||||
syllables = line.rstrip().split(b'\xa5')
|
||||
for i, s in enumerate(syllables):
|
||||
s = s.lower()
|
||||
# Not sure this is the best way to do this
|
||||
# But I think it'll work
|
||||
skip = False
|
||||
for char in s:
|
||||
if char < 97 or char > 122:
|
||||
skip = True
|
||||
if skip:
|
||||
continue
|
||||
sd[s.decode('utf-8')][i] += 1
|
||||
|
||||
dictionary = {}
|
||||
dictionary['syllables and their frequencies per position'] = sd
|
||||
|
||||
# Unsure qutie the best term to use here... but these are all the syllables
|
||||
# stored according to the position they appear in the word
|
||||
# also stored for each time they appear there - useful (hopefully) for
|
||||
# making some vaguely real sounding words because it'll kind of represent
|
||||
# where these syllables are used in real words. Or it might not!
|
||||
sbp = [ [] ] * 19
|
||||
for i, s in enumerate(sd):
|
||||
for j, count in enumerate(sd[s]):
|
||||
# I have _no_ idea why this needs to be done this way
|
||||
vector = [s]*count
|
||||
if i == 0:
|
||||
sbp[j] = vector
|
||||
else:
|
||||
sbp[j].extend(vector)
|
||||
|
||||
dictionary['positions and their syllables'] = sbp
|
||||
|
||||
# I think I'll probably some "meta" values like totals
|
||||
# and totals per position or some stuff like that
|
||||
meta = {}
|
||||
totals = [0] * 19
|
||||
for syl in sd:
|
||||
for i, num in enumerate(sd[syl]):
|
||||
totals[i] += num
|
||||
|
||||
meta['syllable totals'] = totals
|
||||
meta['total syllables'] = sum(totals)
|
||||
dictionary['__meta__'] = meta
|
||||
print(dictionary['__meta__']['syllable totals'])
|
||||
|
||||
with open(OUTPUT_FILENAME, 'w') as f:
|
||||
json.dump(dictionary, f, indent=4)
|
||||
Reference in New Issue
Block a user