X-Git-Url: http://www.iramuteq.org/git?a=blobdiff_plain;f=corpus.py;fp=corpus.py;h=489d4f18de1c71b1ce80f3699985b5cfefd62b21;hb=af8c3ff85a3c867356cb6353ca0e9bfe414d5b76;hp=5fe448aee4570fcfbcd69d9efabf7c79223d8aff;hpb=978a043350587e050b2eae263bcadbff716854d9;p=iramuteq diff --git a/corpus.py b/corpus.py index 5fe448a..489d4f1 100644 --- a/corpus.py +++ b/corpus.py @@ -20,6 +20,9 @@ from operator import itemgetter from uuid import uuid4 import datetime from copy import copy +#------test spacy------------ +#import spacy +#nlp = spacy.load("fr_core_news_lg") #------------------------------------ # import des fichiers du projet @@ -1546,6 +1549,7 @@ class BuildSubCorpus(BuildCorpus): class BuildFromAlceste(BuildCorpus) : def read_corpus(self, infile) : + if self.dlg is not None : self.dlg.Pulse('textes : 0 - segments : 0') self.limitshow = 0 @@ -1565,6 +1569,8 @@ class BuildFromAlceste(BuildCorpus) : if self.testuci(line) : iduci += 1 if txt != [] : + #doc = nlp(' '.join(txt)) + #print([[word, word.pos_, word.lemma_] for word in doc]) iduce, idpara = self.treattxt(txt, iduce, idpara, iduci - 1) txt = [] self.corpus.ucis.append(Uci(iduci, line))