0

I want to concatenate a word separated by an asterisk in a list of words. The code I am trying is:

import nltk
from nltk.tokenize import word_tokenize
import re

words = ['les','engage', '*', 'ment', 'de','la']

with open ('Fr-dictionary.txt') as fr:
    dic = word_tokenize(fr.read().lower())

l=[ ]
errors=[ ]

for n,word in enumerate (words):
    l.append(word)
    if word == "*":
        print(words[n-1], words[n+1])
        exp = words[n-1] + words[n+1]
        if exp in dic:  
            l.append(exp)
            errors.append(words[n-1])
            errors.append("*")
            errors.append(words[n+1])
        else:
            continue

print(l)
print(errors)


l=frozenset(l)
errors=frozenset(errors)

c=l.difference(errors)

print(list(c)) 

My output is:

['la', 'les', 'de', 'engagement'] 

But my desired output has to be in the same order of the original list without:

['les','engagement', 'de','la']

Is there any other way to get the desired output?

Nadia Santos
  • 63
  • 1
  • 8

1 Answers1

0

try this

    while "*" in words:
        index = words.index("*")
        words.pop(index)
        words.insert(index,words.pop(index-1)+words.pop(index-1))