X-Git-Url: http://www.iramuteq.org/git?a=blobdiff_plain;f=corpus.py;h=489d4f18de1c71b1ce80f3699985b5cfefd62b21;hb=dac789bedd0b3867a7749b1d71bc7868c44642f7;hp=5fe448aee4570fcfbcd69d9efabf7c79223d8aff;hpb=8cac79f863647b171a7de26478fa85dc358b80e4;p=iramuteq diff --git a/corpus.py b/corpus.py index 5fe448a..489d4f1 100644 --- a/corpus.py +++ b/corpus.py @@ -20,6 +20,9 @@ from operator import itemgetter from uuid import uuid4 import datetime from copy import copy +#------test spacy------------ +#import spacy +#nlp = spacy.load("fr_core_news_lg") #------------------------------------ # import des fichiers du projet @@ -1546,6 +1549,7 @@ class BuildSubCorpus(BuildCorpus): class BuildFromAlceste(BuildCorpus) : def read_corpus(self, infile) : + if self.dlg is not None : self.dlg.Pulse('textes : 0 - segments : 0') self.limitshow = 0 @@ -1565,6 +1569,8 @@ class BuildFromAlceste(BuildCorpus) : if self.testuci(line) : iduci += 1 if txt != [] : + #doc = nlp(' '.join(txt)) + #print([[word, word.pos_, word.lemma_] for word in doc]) iduce, idpara = self.treattxt(txt, iduce, idpara, iduci - 1) txt = [] self.corpus.ucis.append(Uci(iduci, line))