X-Git-Url: http://www.iramuteq.org/git?a=blobdiff_plain;f=corpus.py;fp=corpus.py;h=e6b0bf2345dbeed70bd26eb10ae4ec2fff90d899;hb=1301403740fe3e9487f67a07870796f9e3dfb1f9;hp=7ab0ebbc9bbcdcb91c647511cab345ca754323c1;hpb=14ef9adbd80ca7e8ed9ee3f108858fa7d7c24d5b;p=iramuteq diff --git a/corpus.py b/corpus.py index 7ab0ebb..e6b0bf2 100644 --- a/corpus.py +++ b/corpus.py @@ -20,6 +20,9 @@ from operator import itemgetter from uuid import uuid4 import datetime from copy import copy +#------test spacy------------ +#import spacy +#nlp = spacy.load("fr_core_news_lg") #------------------------------------ # import des fichiers du projet @@ -1546,6 +1549,7 @@ class BuildSubCorpus(BuildCorpus): class BuildFromAlceste(BuildCorpus) : def read_corpus(self, infile) : + if self.dlg is not None : self.dlg.Pulse('textes : 0 - segments : 0') self.limitshow = 0 @@ -1565,6 +1569,8 @@ class BuildFromAlceste(BuildCorpus) : if self.testuci(line) : iduci += 1 if txt != [] : + #doc = nlp(' '.join(txt)) + #print([[word, word.pos_, word.lemma_] for word in doc]) iduce, idpara = self.treattxt(txt, iduce, idpara, iduci - 1) txt = [] self.corpus.ucis.append(Uci(iduci, line))