X-Git-Url: http://www.iramuteq.org/git?a=blobdiff_plain;f=textsimi.py;h=08c9f6304acd73769c0f7c09ebeb92aec778e0f7;hb=refs%2Fheads%2F3.0;hp=dd5349cafed394e59eeb9b9c9594b977e1be3b9a;hpb=b5603a452507554481981b32147a60c17151cade;p=iramuteq diff --git a/textsimi.py b/textsimi.py index dd5349c..08c9f63 100644 --- a/textsimi.py +++ b/textsimi.py @@ -1,32 +1,43 @@ # -*- coding: utf-8 -*- #Author: Pierre Ratinaud -#Copyright (c) 2008-2013 Pierre Ratinaud +#Copyright (c) 2008-2020 Pierre Ratinaud +#modification pour python 3 : Laurent Mérat, 6x7 - mai 2020 #License: GNU/GPL -from chemins import ffr, simipath -#from corpus import Corpus +#------------------------------------ +# import des modules python +#------------------------------------ import os +from copy import copy +from operator import itemgetter +import codecs +import logging + +#------------------------------------ +# import des modules wx +#------------------------------------ +import wx + +#------------------------------------ +# import des fichiers du projet +#------------------------------------ +from chemins import ffr, simipath from analysetxt import AnalyseText -#from ConfigParser import RawConfigParser -#from guifunct import getPage, getCorpus from guifunct import PrepSimi -from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste -#from tableau import Tableau -#from tabsimi import DoSimi -from PrintRScript import PrintSimiScript -import wx -from copy import copy +from functions import indices_simi, progressbar, treat_var_mod, read_list_file, print_liste, DoConf, exec_rcode, check_Rresult +from PrintRScript import PrintSimiScript -import logging log = logging.getLogger('iramuteq.textsimi') + class SimiTxt(AnalyseText): + def doanalyse(self) : self.parametres['type'] = 'simitxt' self.pathout.basefiles(simipath) self.indices = indices_simi - if self.dlg : + if self.dlg : # quel est le lien ??? self.makesimiparam() #FIXME self.actives = self.corpus.make_actives_limit(3) @@ -38,13 +49,18 @@ class SimiTxt(AnalyseText): self.stars = copy(self.listet) self.parametres['stars'] = copy(self.listet) self.parametres['sfromchi'] = False - self.dlg.Destroy() prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol) if prep.val == wx.ID_OK : continu = True self.parametres = prep.parametres - self.dlg = progressbar(self.ira, 4) +# self.dlg = progressbar(self.ira, 4) + else : + return False else : + order_actives = [[i, act, self.corpus.getlemeff(act)] for i, act in enumerate(self.actives)] + order_actives = sorted(order_actives, key=itemgetter(2), reverse = True) + with open(self.pathout['selected.csv'], 'w', encoding='utf8') as f : + f.write('\n'.join([repr(order_actives[val][0]) for val in self.parametres['selected']])) continu = True if continu : self.makefiles() @@ -113,18 +129,21 @@ class SimiTxt(AnalyseText): self.parametres['nbactives'] = len(self.actives) self.parametres['fromprof'] = False self.corpus.make_and_write_sparse_matrix_from_uces(self.actives, self.pathout['mat01.csv'], self.pathout['listeuce1.csv']) - with open(self.pathout['actives.csv'], 'w') as f : - f.write('\n'.join(self.actives).encode(self.ira.syscoding)) + with open(self.pathout['actives.csv'], 'w', encoding='utf8') as f : + f.write('\n'.join(self.actives)) + class SimiFromCluster(SimiTxt) : + def __init__(self, ira, corpus, actives, lfreq, lchi, numcluster, parametres = None, dlg = False) : self.actives = actives self.numcluster = numcluster self.lfreq = lfreq self.lchi = lchi parametres['name'] = 'simi_classe_%i' % (numcluster + 1) - SimiTxt.__init__(self, ira, corpus, parametres, dlg, lemdial = False) - + dlg.Destroy() + SimiTxt.__init__(self, ira, corpus, parametres, dlg=True, lemdial = False) + def preferences(self) : return self.parametres @@ -139,10 +158,9 @@ class SimiFromCluster(SimiTxt) : dictcol = dict([[i, [act, self.corpus.getlemclustereff(act, self.numcluster)]] for i, act in enumerate(self.actives)]) continu = True if self.dlg : - #self.listet = self.corpus.make_etoiles() - #self.listet.sort() - self.stars = []#copy(self.listet) - self.parametres['stars'] = 0#copy(self.listet) +# self.dlg.Destroy() + self.stars = [] + self.parametres['stars'] = 0 self.parametres['sfromchi'] = 1 prep = PrepSimi(self.ira, self, self.parametres, self.pathout['selected.csv'], self.actives, indices_simi, wordlist=dictcol) if prep.val == wx.ID_OK : @@ -151,6 +169,7 @@ class SimiFromCluster(SimiTxt) : else : continu = False if continu : + self.dlg = progressbar(self.parent, 3) self.makefiles() self.parametres['type'] = 'clustersimitxt' script = PrintSimiScript(self) @@ -169,6 +188,7 @@ class SimiFromCluster(SimiTxt) : else : graph_simi = [[os.path.basename(fileout), script.txtgraph]] print_liste(self.pathout['liste_graph'], graph_simi) + self.dlg.Destroy() else : return False @@ -177,10 +197,9 @@ class SimiFromCluster(SimiTxt) : self.parametres['nbactives'] = len(self.actives) self.parametres['fromprof'] = True self.corpus.make_and_write_sparse_matrix_from_classe(self.actives, self.corpus.lc[self.numcluster], self.pathout['mat01.csv']) - with open(self.pathout['actives.csv'], 'w') as f : - f.write('\n'.join(self.actives).encode(self.ira.syscoding)) - with open(self.pathout['actives_nb.csv'], 'w') as f : - f.write('\n'.join([`val` for val in self.lfreq])) - with open(self.pathout['actives_chi.csv'], 'w') as f : - f.write('\n'.join([`val` for val in self.lchi])) - + with open(self.pathout['actives.csv'], 'w', encoding='utf8') as f : + f.write('\n'.join(self.actives)) + with open(self.pathout['actives_nb.csv'], 'w', encoding='utf8') as f : + f.write('\n'.join([repr(val) for val in self.lfreq])) + with open(self.pathout['actives_chi.csv'], 'w', encoding='utf8') as f : + f.write('\n'.join([repr(val) for val in self.lchi]))