1

i want to extract from a sentence a noun-adj pairs : i tried this code :

import stanza

nlp = stanza.Pipeline("fr")

doc = nlp("La voiture est belle et jolie, et grand. Le tableau qui est juste en dessous est grand. La femme intelligente et belle est grande. Le service est rapide et les plats sont délicieux.")

def recursive_find_adjs(root, sent):
    children = [w for w in sent.words if w.head == root.id]

    if not children:
        return []

    filtered_c = [w for w in children if w.deprel == "conj" and w.upos == "ADJ"]
    # Do not include an adjective if it is the parent of a noun to prevent
    results = [w for w in filtered_c if not any(sub.head == w.id and sub.upos == "NOUN" for sub in sent.words)]
    for w in children:
        results += recursive_find_adjs(w, sent)

    return results

for sent in doc.sentences:
    nouns = [w for w in sent.words if w.upos == "NOUN"]
    noun_adj_pairs = {}
    for noun in nouns:
        # Find constructions in the form of "La voiture est belle"
        # In this scenario, the adjective is the parent of the noun
        cop_root = sent.words[noun.head-1]
        adjs = [cop_root] + recursive_find_adjs(cop_root, sent) if cop_root.upos == "ADJ" else []

        # Find constructions in the form of "La femme intelligente et belle"
        # Here, the adjectives are descendants of the noun
        mod_adjs = [w for w in sent.words if w.head == noun.id and w.upos == "ADJ"]
        # This should only be one element because conjunctions are hierarchical
        if mod_adjs:
            mod_adj = mod_adjs[0]
            adjs.extend([mod_adj] + recursive_find_adjs(mod_adj, sent))

        if adjs:
            unique_adjs = []
            unique_ids = set()
            for adj in adjs:
                if adj.id not in unique_ids:
                    unique_adjs.append(adj)
                    unique_ids.add(adj.id)

            noun_adj_pairs[noun.text] = " ".join([adj.text for adj in unique_adjs])

    print(noun_adj_pairs)

it works well but not take into account the case of : "the restaurant n'est pas bien" ("négation") or when we have NOUN+ "au" ('prépositions') + "bien"(adj)
any solution please ?

0 Answers0