The goal of this code is to list an array of arrays containing object words of the length of it's index, which also contain an array of arrays of subwords that contain subwords of the length of it's index as well. These subwords are words that you can spell with the characters of the word itself. I'm trying to optimize my code so it can process about 81,000 words at a faster rate. The current program runs and will complete the process but it will take several hours to complete. I'm struggling to find where in my code can bested changed to decrease the time needed to run.
This is an example words.txt file.
do
dog
eat
go
god
goo
good
tea
Below is the expected subwords.json output from the txt file above.
[
[],
[],
[
{
"word": "do",
"subwords": [
[],
[],
[]
]
},
{
"word": "go",
"subwords": [
[],
[],
[]
]
}
],
[
{
"word": "dog",
"subwords": [
[],
[],
[
"do",
"go"
],
[
"god"
]
]
},
{
"word": "eat",
"subwords": [
[],
[],
[],
[
"tea"
]
]
},
{
"word": "god",
"subwords": [
[],
[],
[
"do",
"go"
],
[
"dog"
]
]
},
{
"word": "goo",
"subwords": [
[],
[],
[
"go"
],
[]
]
},
{
"word": "tea",
"subwords": [
[],
[],
[],
[
"eat"
]
]
}
]
]
Below is the code that is running, calling the words.txt and outputting the subwords.json
const fs = require('fs')
const totalTalkText = fs.readFileSync('words.txt').toString()
const sortedWords = totalTalkText
.toLowerCase()
.split(/[^a-zA-Z]+/g)
.sort()
.sort((a, b) => {
return a.length - b.length
})
const finalData = new Array()
for (i = 0; i < sortedWords[0].length; i++) {
finalData[i] = []
}
sortedWords.forEach((word, index) => {
const subwordsArray = new Array()
let wordObject = {
word: word
}
if (finalData.indexOf(word.length) === -1) {
finalData[word.length] = new Array()
}
sortedWords.some(subword => {
if (subword.length > word.length) return
if (subwordsArray.indexOf(subword.length) === -1) {
subwordsArray[subword.length] = new Array()
for (i = 0; i < sortedWords[0].length; i++) {
subwordsArray[i] = []
}
}
if (subword !== '' && word !== subword && isSubword(word, subword)) {
subwordsArray[subword.length].push(subword)
}
})
wordObject.subwords = subwordsArray
finalData[word.length].push(wordObject)
//console.log(`${word}: ${index / 814.88}%`) //This is mostly to help me gauge how long the program would take
})
fs.writeFileSync('subwords.json', JSON.stringify([...finalData]))
function isSubword(word, subword) {
let tmpArray = new Array(256)
for (let i = 0; i < 256; i++) tmpArray[i] = 0
for (let i in word) tmpArray[word.charCodeAt(i)] += 1
for (let i in subword) {
tmpArray[subword.charCodeAt(i)] -= 1
if (tmpArray[subword.charCodeAt(i)] < 0) return false
}
return true
}
I have been trying to find a better way to check if a word is a subword in the isSubword function but have had no success. Again, the code should function given a long period of time, I am just trying to get it to run significantly faster. Any help would be greatly appreciated!