1 # -*- coding: utf-8 -*-
2 #Author: Pierre Ratinaud
3 #Copyright (c) 2008-2020 Pierre Ratinaud
4 #modification pour python 3 : Laurent Mérat, 6x7 - mai 2020
7 #------------------------------------
8 # import des modules python
9 #------------------------------------
11 from subprocess import Popen, call, PIPE
23 from shutil import copyfile
26 #from dialog import BugDialog
28 from operator import itemgetter
30 #------------------------------------
31 # import des modules wx
32 #------------------------------------
36 #------------------------------------
37 # import des fichiers du projet
38 #------------------------------------
39 from configparser import ConfigParser
42 log = logging.getLogger('iramuteq')
45 indices_simi = ['cooccurrence' ,'pourcentage de cooccurrence','Russel','Jaccard', 'Kulczynski1', 'Kulczynski2', 'Mountford', 'Fager', 'simple matching', 'Hamman', 'Faith', 'Tanimoto', 'Dice', 'Phi', 'Stiles', 'Michael', 'Mozley', 'Yule', 'Yule2', 'Ochiai', 'Simpson', 'Braun-Blanquet','Chi-squared', 'Phi-squared', 'Tschuprow', 'Cramer', 'Pearson', 'binomial']
47 def open_folder(folder):
48 if sys.platform == "win32":
51 opener ="open" if sys.platform == "darwin" else "xdg-open"
52 #call([opener, folder])
53 call(["%s %s &" % (opener, folder)], shell=True)
55 def normpath_win32(path) :
56 if not sys.platform == 'win32' :
58 while '\\\\' in path :
59 path = path.replace('\\\\', '\\')
60 if path.startswith('\\') and not path.startswith('\\\\') :
65 def __init__(self, path = None, encoding = 'utf8'):
68 self.encoding = encoding
70 def __getitem__(self, key):
73 def read(self, path = None):
76 with codecs.open(path, 'r', self.encoding) as f :
78 tgen = [line.split('\t') for line in tgen.splitlines()]
79 tgen = dict([[line[0], line[1:]] for line in tgen])
83 def write(self, path = None):
86 with open(path, 'w') as f :
87 f.write('\n'.join(['\t'.join([val] + self.tgen[val]) for val in self.tgen]))
89 def writetable(self, pathout, tgens, totocc):
90 etoiles = list(totocc.keys())
92 with open(pathout, 'w') as f :
93 line = '\t'.join(['tgens'] + etoiles) + '\n'
96 line = '\t'.join([t] + [repr(tgens[t][et]) for et in etoiles]) + '\n'
100 while totname + repr(i) in tgens :
102 totname = totname + repr(i)
103 line = '\t'.join([totname] + [repr(totocc[et]) for et in etoiles]) + '\n'
107 def __init__(self, filein, syscoding = 'utf8') :
109 self.syscoding = syscoding
111 self.openedcorpus = {}
112 self.openedmatrix = {}
120 d = shelve.open(self.filein)
121 self.history = d.get('history', [])
122 self.matrix = d.get('matrix', [])
123 self.ordercorpus = dict([[corpus['uuid'], i] for i, corpus in enumerate(self.history)])
124 self.corpus = dict([[corpus['uuid'], corpus] for corpus in self.history])
125 self.analyses = dict([[analyse['uuid'], analyse] for corpus in self.history for analyse in corpus.get('analyses', [])])
126 self.matrixanalyse = dict([[mat['uuid'], mat] for mat in self.matrix])
127 self.ordermatrix = dict([[matrix['uuid'], i] for i, matrix in enumerate(self.matrix)])
131 d = shelve.open(self.filein)
132 d['history'] = self.history
133 d['matrix'] = self.matrix
136 def add(self, analyse) :
137 log.info('add to history %s' % analyse.get('corpus_name', 'pas un corpus'))
138 tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
139 if tosave['uuid'] in self.corpus :
140 log.info('problem : this uuid is already in history : %s' % tosave['uuid'])
142 if analyse.get('corpus', False) :
143 if analyse['uuid'] in self.analyses :
145 tosave['corpus'] = analyse['corpus']
146 tosave['name'] = analyse['name']
147 acorpus_uuid = analyse['corpus']
148 if acorpus_uuid in self.corpus :
149 if 'analyses' in self.history[self.ordercorpus[acorpus_uuid]] :
150 self.history[self.ordercorpus[acorpus_uuid]]['analyses'].append(tosave)
152 self.history[self.ordercorpus[acorpus_uuid]]['analyses'] = [tosave]
154 self.orph.append(tosave)
156 tosave['corpus_name'] = analyse['corpus_name']
157 #self.ordercorpus[tosave['uuid']] = len(history)
158 #self.corpus[tosave['uuid']] = analyse
159 self.history.append(tosave)
163 def addMatrix(self, analyse) :
165 #tosave['matrix_name'] = analyse['matrix_name']
166 tosave['analyses'] = []
167 self.matrix.append(tosave)
171 def addMatrixAnalyse(self, analyse) :
172 tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type'], 'matrix' : analyse['matrix']}
173 tosave['name'] = analyse['name']
174 if tosave['matrix'] in self.ordermatrix :
175 self.matrix[self.ordermatrix[tosave['matrix']]]['analyses'].append(tosave)
179 def addmultiple(self, analyses) :
180 log.info('add multiple')
181 for analyse in analyses :
182 tosave = {'uuid' : analyse['uuid'], 'ira': analyse['ira'], 'type' : analyse['type']}
183 corpus = analyse['corpus']
184 tosave['corpus'] = corpus
185 tosave['name'] = analyse['name']
186 if corpus in self.corpus :
187 if 'analyses' in self.history[self.ordercorpus[corpus]] :
188 self.history[self.ordercorpus[corpus]]['analyses'].append(tosave)
190 self.history[self.ordercorpus[corpus]]['analyses'] = [tosave]
194 def delete(self, analyse, corpus = False) :
195 log.info('delete %s' % analyse.get('name', 'noname'))
197 self.history.pop(self.ordercorpus[analyse['uuid']])
198 if analyse['uuid'] in self.openedcorpus :
199 del self.openedcorpus[analyse['uuid']]
200 log.info('delete corpus : %s' % analyse['uuid'])
201 elif analyse['uuid'] in self.analyses :
202 todel = [i for i, ana in enumerate(self.corpus[analyse['corpus']]['analyses']) if ana['uuid'] == analyse['uuid']][0]
203 self.history[self.ordercorpus[analyse['corpus']]]['analyses'].pop(todel)
204 elif analyse['uuid'] in self.matrixanalyse :
205 self.matrix = [mat for mat in self.matrix if mat['uuid'] != analyse['uuid']]
206 elif analyse.get('matrix', False) in self.matrixanalyse :
207 analyses = self.matrix[self.ordermatrix[analyse['matrix']]]['analyses']
208 topop = [i for i, val in enumerate(analyses) if analyse['uuid'] == val['uuid']][0]
210 self.matrix[self.ordermatrix[analyse['matrix']]]['analyses'] = analyses
214 def addtab(self, analyse) :
215 self.opened[analyse['uuid']] = analyse
217 def rmtab(self, analyse) :
218 del self.opened[analyse['uuid']]
220 def update(self, analyse) :
221 if 'matrix_name' in analyse :
222 self.matrixanalyse[analyse['uuid']].update(analyse)
223 elif 'corpus_name' in analyse :
224 self.corpus[analyse['uuid']].update(analyse)
225 elif 'corpus' in analyse :
226 self.analyses[analyse['uuid']].update(analyse)
228 toupdate = [an for an in self.matrixanalyse[analyse['matrix']]['analyses'] if an['uuid'] == analyse['uuid']]
229 toupdate[0].update(analyse)
234 corpustodel = [corpus for corpus in self.history if not os.path.exists(corpus['ira'])]
236 for corpus in corpustodel :
237 print('cleaning :', corpus['corpus_name'])
238 self.delete(corpus, corpus = True)
239 anatodel = [analyse for corpus in self.history for analyse in corpus.get('analyses', []) if not os.path.exists(analyse.get('ira', '/'))]
240 for analyse in anatodel :
241 print('cleaning :', analyse['name'])
256 for corpus in self.history :
257 analysenb += len(corpus.get('analyses', []))
258 analyses = corpus.get('analyses', [])
259 for analyse in analyses :
260 if os.path.exists(analyse['ira']) :
261 ana = DoConf(analyse['ira']).getoptions()
263 time = ana['time'].split()
264 ha += int(time[0].replace('h','')) * 3600
265 ma += int(time[1].replace('m','')) * 60
266 sa += int(time[2].replace('s',''))
267 if os.path.exists(corpus['ira']) :
268 param = DoConf(corpus['ira']).getoptions()
269 time = param.get('time','0h 0m 0s')
271 hours += int(time[0].replace('h','')) * 3600
272 minutes += int(time[1].replace('m','')) * 60
273 secondes += int(time[2].replace('s',''))
274 if param.get('originalpath', False) :
275 if param['originalpath'] in corpusnb :
276 corpusnb[param['originalpath']] += 1
277 tokens += int(param['occurrences'])
279 corpusnb[param['originalpath']] = 1
284 if corpus['ira'] in todel :
288 print('Nbr total de corpus : %s' % len(self.history))
289 corpus_nb = len(corpusnb) + len(todel)
290 print('Nbr de corpus différents : %s' % corpus_nb)
291 lentodel = len(todel)
292 print('Nbr de corpus à supprimer : %s' % lentodel)
293 print('Nbr de sous corpus : %s' % subnb)
294 print("Nbr total d'occurrences : %s" % tokens)
295 print('Moyenne occurrences par corpus : %f' % (tokens/corpus_nb))
296 print('---------------------')
297 print("Nbr total d'analyses : %s" % analysenb)
298 print('Temps total indexation : %f h' % ((hours+minutes+secondes) / 3600))
299 print('Temps total analyses : %f h' % ((ha+ma+sa) / 3600))
302 return str(self.history)
305 def __init__(self, configfile=None, diff = None, parametres = None) :
306 self.configfile = configfile
307 self.conf = ConfigParser(interpolation=None) # pourquoi ce paramètre ???
309 if configfile is not None :
310 configfile = normpath_win32(configfile)
311 self.conf.read_file(codecs.open(configfile, 'r', 'utf8'))
313 if parametres is not None :
314 self.doparametres(parametres)
316 def doparametres(self, parametres) :
319 def getsections(self) :
320 return self.conf.sections()
322 def getoptions(self, section = None, diff = None):
325 section = self.conf.sections()[0]
326 for option in self.conf.options(section) :
327 if self.conf.get(section, option).isdigit() :
328 parametres[option] = int(self.conf.get(section, option))
329 elif self.conf.get(section, option) == 'False' :
330 parametres[option] = False
331 elif self.conf.get(section, option) == 'True' :
332 parametres[option] = True
333 elif self.conf.get(section, option).startswith('(') and self.conf.get(section, option).endswith(')') :
334 parametres[option] = ast.literal_eval(self.conf.get(section, option))
335 elif self.conf.get(section, option).startswith('[') and self.conf.get(section, option).endswith(']') :
336 parametres[option] = ast.literal_eval(self.conf.get(section, option))
338 parametres[option] = self.conf.get(section, option)
339 if 'type' not in parametres :
340 parametres['type'] = section
343 def makeoptions(self, sections, parametres, outfile = None) :
345 for i, section in enumerate(sections) :
346 txt += '[%s]\n' % section
347 if not self.conf.has_section(section) :
348 self.conf.add_section(section)
349 for option in parametres[i] :
350 if isinstance(parametres[i][option], int) :
351 self.conf.set(section, option, repr(parametres[i][option]))
352 txt += '%s = %i\n' % (option, parametres[i][option])
353 elif isinstance(parametres[i][option], str) :
354 self.conf.set(section, option, parametres[i][option])
355 txt += '%s = %s\n' % (option, parametres[i][option])
356 elif isinstance(parametres[i][option], wx.Colour) :
357 self.conf.set(section, option, str(parametres[i][option]))
358 txt += '%s = %s\n' % (option, str(parametres[i][option]))
359 elif option == 'analyses' :
362 self.conf.set(section, option, repr(parametres[i][option]))
363 txt += '%s = %s\n' % (option, repr(parametres[i][option]))
365 outfile = self.configfile
366 outfile = normpath_win32(outfile)
367 with open(outfile, 'w', encoding="utf-8") as f :
371 def totext(self, parametres) :
374 for val in parametres :
375 if isinstance(parametres[val], int) :
376 txt.append(' \t\t: '.join([val, repr(parametres[val])]))
377 elif isinstance(parametres[val], str) :
378 txt.append(' \t\t: '.join([val, parametres[val]]))
379 elif val in ['listet', 'stars'] :
382 txt.append(' \t\t: '.join([val, repr(parametres[val])]))
383 return '\n'.join(txt)
386 def write_tab(tab, fileout) :
387 csvWriter = csv.writer(open(fileout, 'w'), delimiter=';', quoting = csv.QUOTE_NONNUMERIC)
388 csvWriter.writerows(tab)
390 class BugDialog(wx.Dialog):
391 def __init__(self, *args, **kwds):
392 # begin wxGlade: MyDialog.__init__
393 kwds["style"] = wx.DEFAULT_DIALOG_STYLE | wx.STAY_ON_TOP
394 kwds["size"] = wx.Size(500, 200)
395 wx.Dialog.__init__(self, *args, **kwds)
396 self.SetTitle(kwds['title'])
397 self.text_ctrl_1 = wx.TextCtrl(self, -1, "", style=wx.TE_MULTILINE)
398 self.text_ctrl_1.SetBackgroundColour('#DDE8EB')
399 self.button_1 = wx.Button(self, wx.ID_OK, "")
401 self.__set_properties()
405 def __set_properties(self):
406 # begin wxGlade: MyDialog.__set_properties
407 self.SetMinSize(wx.Size(500, 200))
408 self.text_ctrl_1.SetMinSize(wx.Size(500, 200))
412 def __do_layout(self):
413 # begin wxGlade: MyDialog.__do_layout
414 sizer_1 = wx.BoxSizer(wx.VERTICAL)
415 sizer_1.Add(self.text_ctrl_1, 1, wx.EXPAND, 0)
416 sizer_1.Add(self.button_1, 0, wx.ALIGN_CENTER_HORIZONTAL, 0)
417 self.SetSizer(sizer_1)
422 def CreateIraFile(DictPathOut, clusternb, corpname='corpus_name', section = 'analyse'):
423 AnalyseConf = ConfigParser()
424 AnalyseConf.read(DictPathOut['ira'])
425 AnalyseConf.add_section(section)
426 date = datetime.datetime.now().ctime()
427 AnalyseConf.set(section, 'date', str(date))
428 AnalyseConf.set(section, 'clusternb', clusternb)
429 AnalyseConf.set(section, 'corpus_name', corpname)
431 fileout = open(DictPathOut['ira'], 'w')
432 AnalyseConf.write(fileout)
435 def multisort(liste2d, ordre, indices_tri):
438 methode destinée à remplacer 'comp' qui a disparu en Python 3
439 tri de tuples sur l'un des éléments du tuple
440 en principe, elle doit renvoyer les éléments triés selon le principe d'avant
441 tel que décrit dans la docstring de 'sortedby'
443 probablement à améliorer pour la rendre d'usage plus général
444 en acceptant un nombre variable de parametres ???
447 indices_triTuple = indices_tri.Tuple(int, ...)
448 for key in reversed(indices_tri):
449 liste2d.sort(key=attrgetter(key), reverse=ordre)
452 def sortedby(liste2d, direct, *indices):
455 sortedby: sort a list of lists (e.g. a table) by one or more indices
456 (columns of the table) and return the sorted list
459 for list = [[2,3],[1,2],[3,1]]:
460 sortedby(list,1) will return [[3, 1], [1, 2], [2, 3]],
461 sortedby(list,0) will return [[1, 2], [2, 3], [3, 1]]
463 elle n'est pas remplacée par la méthode 'multisort' ???
468 # nlist = map(lambda x, indices=indices:
469 # map(lambda i, x=x: x[i], indices) + [x],
472 # iramuteq passé à 2to3
473 # nlist = list(map(lambda x, indices=indices:
474 # list(map(lambda i, x=x: x[i], indices)) + [x],
477 for key in reversed(indices):
478 liste2d.sort(key=itemgetter(key), reverse=(direct==2))
484 # sorted_list = multisort(liste2d, direct, *indices)
487 # nlist.sort(reverse=True)
488 # sorted_list = multisort(liste2d, direct, *indices)
490 # return [l[-1] for l in nlist]
493 def add_type(line, dictlem):
494 if line[4] in dictlem:
495 line.append(dictlem[line[4]])
500 def treat_line_alceste(i, line) :
501 if line[0] == '*' or line[0] == '*****' :
506 elif float(line[5].replace(',', '.')) < 0.0001:
508 elif float(line[5].replace(',', '.')) > 0.05:
509 line[5] = 'NS (%s)' % str(float(line[5].replace(',', '.')))[0:7]
511 line[5] = str(float(line[5].replace(',', '.')))[0:7]
512 return [i, int(line[0]), int(line[1]), float(line[2]), float(line[3]), line[6], line[4], line[5]]
514 def ReadProfileAsDico(File, Alceste=False, encoding = sys.getdefaultencoding()):
516 print('lecture des profiles')
517 FileReader = codecs.open(File, 'r', encoding)
518 Filecontent = FileReader.readlines()
522 #rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace(',', '.').replace('\r','').split(';') for row in Filecontent]
523 rows = [row.replace('\n', '').replace("'", '').replace('\"', '').replace('\r','').split(';') for row in Filecontent]
525 ClusterNb = rows[0][2]
527 clusters = [row[2] for row in rows if row[0] == '**']
528 valclusters = [row[1:4] for row in rows if row[0] == '****']
529 lp = [i for i, line in enumerate(rows) if line[0] == '****']
530 prof = [rows[lp[i] + 1:lp[i+1] - 1] for i in range(0, len(lp)-1)] + [rows[lp[-1] + 1:len(rows)]]
532 prof = [[add_type(row, dictlem) for row in pr] for pr in prof]
533 prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
535 prof = [[line + [''] for line in pr] for pr in prof]
536 prof = [[treat_line_alceste(i,line) for i, line in enumerate(pr)] for pr in prof]
537 for i, cluster in enumerate(clusters):
538 DictProfile[cluster] = [valclusters[i]] + prof[i]
541 def GetTxtProfile(dictprofile, cluster_size) :
543 for classe in range(0, len(dictprofile)) :
544 prof = dictprofile[str(classe + 1)]
545 clinfo = cluster_size[classe]
546 proflist.append('\n'.join([' '.join(['classe %i' % (classe + 1), '-', '%s uce sur %s - %s%%' % (clinfo[0], clinfo[1], clinfo[2])]), '\n'.join(['%5s|%5s|%6s|%6s|%8s|%8s|%20s\t%10s' % tuple([str(val) for val in line]) for line in prof if len(line)==8])]))
547 return '\n\n'.join(proflist)
549 def formatExceptionInfo(maxTBlevel=5):
550 cla, exc, trbk = sys.exc_info()
552 excName = cla.__name__
556 excArgs = exc.args[0]
558 excArgs = "<no args>"
559 excTb = traceback.format_tb(trbk, maxTBlevel)
560 return (excName, excArgs, excTb)
563 #fonction des etudiants de l'iut
564 def decoupercharact(chaine, longueur, longueurOptimale, separateurs = None) :
566 on part du dernier caractère, et on recule jusqu'au début de la chaîne.
567 Si on trouve un '$', c'est fini.
568 Sinon, on cherche le meilleur candidat. C'est-Ã -dire le rapport poids/distance le plus important.
570 separateurs = [['.', 60.0], ['?', 60.0], ['!', 60.0], ['£$£', 60], [':', 50.0], [';', 40.0], [',', 10.0], [' ', 0.1]]
571 trouve = False # si on a trouvé un bon séparateur
572 iDecoupe = 0 # indice du caractere ou il faut decouper
574 # on découpe la chaine pour avoir au maximum 240 caractères
575 longueur = min(longueur, len(chaine) - 1)
576 chaineTravail = chaine[:longueur + 1]
578 meilleur = ['', 0, 0] # type, poids et position du meilleur separateur
580 # on vérifie si on ne trouve pas un '$'
581 indice = chaineTravail.find('$')
586 # si on ne trouve rien, on cherche le meilleur séparateur
589 caractere = chaineTravail[nbCar]
590 distance = abs(longueurOptimale - nbCar) + 1
591 meilleureDistance = abs(longueurOptimale - meilleur[2]) + 1
593 # on vérifie si le caractére courant est une marque de ponctuation
594 for s in separateurs:
595 if caractere == s[0]:
596 # si c'est une ponctuation
598 if s[1] / distance > float(meilleur[1]) / meilleureDistance:
606 # et on termine la recherche
609 # on passe au caractère précédant
614 fin = chaine[iDecoupe + 1:]
615 retour = chaineTravail[:iDecoupe]
616 return len(retour) > 0, retour.split(), fin
617 # si on a rien trouvé
618 return False, chaine.split(), ''
621 exceptions = {'paragrapheOT' : "Un problème de formatage (présence d'un marqueur de paragraphe (-*) en dehors d'un texte) est survenu à la ligne ",
622 'EmptyText' : "Texte vide (probablement un problème de formatage du corpus). Le problème est apparu à la ligne ",
623 'CorpusEncoding' : "Problème d'encodage.",
624 'TextBeforeTextMark' : "Problème de formatage : du texte avant le premier marqueur de texte (****). Le problème est survenu à la ligne ",
625 'MissingAnalyse' : 'Aucun fichier à cet emplacement :\n',
628 def BugReport(parent, error = None):
629 for ch in parent.GetChildren():
630 if "<class 'wx._windows.ProgressDialog'>" == str(type(ch)):
632 excName, exc, excTb = formatExceptionInfo()
633 if excName == 'Exception' :
635 if len(exc.split()) == 2 :
636 mss, linenb = exc.split()
637 if mss in exceptions :
638 txt = exceptions[mss] + linenb
642 if exc in exceptions :
643 txt = exceptions[exc]
646 title = "Information"
648 txt = '\n !== BUG ==! \n'
649 txt += '*************************************\n'
650 txt += '\n'.join(excTb).replace(' ', ' ')
651 txt += excName + '\n'
655 dial = BugDialog(parent, **{'title' : title})
656 if 'Rerror' in dir(parent) :
660 dial.text_ctrl_1.write(txt)
661 dial.CenterOnParent()
665 def PlaySound(parent):
666 if parent.pref.getboolean('iramuteq', 'sound') :
668 if "gtk2" in wx.PlatformInfo:
669 error = Popen(['aplay','-q',os.path.join(parent.AppliPath,'son_fin.wav')])
671 sound = wx.adv.Sound(os.path.join(parent.AppliPath, 'son_fin.wav'))
672 sound.Play(wx.adv.SOUND_SYNC)
676 def ReadDicoAsDico(dicopath):
677 with codecs.open(dicopath, 'r', 'UTF8') as f:
678 content = f.readlines()
679 lines = [line.rstrip('\n\r').replace('\n', '').replace('"', '').split('\t') for line in content if line != '']
680 return dict([[line[0], line[1:]] for line in lines])
682 def ReadLexique(parent, lang = 'french', filein = None):
685 parent.lexique = ReadDicoAsDico(parent.DictPath.get(lang, 'french'))
687 parent.lexique = ReadDicoAsDico(filein)
692 parent.lexique = ReadDicoAsDico(filein)
694 def ReadList(filein, encoding = sys.getdefaultencoding(), sep = ';'):
696 with codecs.open(filein, 'r', encoding) as f :
698 content = [line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.').split(sep) for line in content.splitlines()]
699 #file = codecs.open(filein, 'r', encoding)
700 #content = file.readlines()
702 first = content.pop(0)
703 #first = first.replace('\n', '').replace('\r','').replace('\"', '').split(sep)
707 #line = line.replace('\n', '').replace('\r','').replace('\"', '').replace(',', '.')
708 #line = line.split(';')
717 don = float('%.5f' % float(val))
723 def exec_RCMD(rpath, command) :
724 log.info('R CMD INSTALL %s' % command)
725 rpath = rpath.replace('\\','\\\\')
726 error = call(["%s" % rpath, 'CMD', 'INSTALL', "%s" % command])
729 def exec_rcode(rpath, rcode, wait = True, graph = False):
730 log.info("R Script : %s" % rcode)
732 if sys.platform == 'darwin' :
734 macversion = platform.mac_ver()[0].split('.')
735 if int(macversion[1]) < 5 :
741 rpath = rpath.replace('\\','\\\\')
742 env = os.environ.copy()
743 if sys.platform == 'darwin' and 'LC_ALL' not in env:
744 env['LC_ALL'] = 'en_US.UTF-8'
747 if sys.platform == 'win32':
748 error = call(["%s" % rpath, "--vanilla","--slave","-f", "%s" % rcode])
750 error = call([rpath, '--slave', "--vanilla", "--encoding=UTF-8", "-f %s" % rcode], env = env)
753 if sys.platform == 'win32':
754 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
756 pid = Popen([rpath, '--slave', "--vanilla", "--encoding=UTF-8", "-f %s" % rcode], stderr = PIPE, env = env, encoding='UTF-8') #PIPE ou STDOUT ?
760 if sys.platform == 'win32':
761 error = call(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
762 elif sys.platform == 'darwin' and needX11:
763 os.environ['DISPLAY'] = ':0.0'
764 error = call([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], env = env, encoding='UTF-8')
766 error = call([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], env = env, encoding='UTF-8')
769 if sys.platform == 'win32':
770 pid = Popen(["%s" % rpath, '--vanilla','--slave','-f', "%s" % rcode])
771 elif sys.platform == 'darwin' and needX11:
772 os.environ['DISPLAY'] = ':0.0'
773 pid = Popen([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], stderr = PIPE, env = env, encoding='UTF-8')
775 pid = Popen([rpath, '--vanilla','--slave', "--encoding=UTF-8","-f %s" % rcode], stderr = PIPE, env = env, encoding='UTF-8')
778 def check_Rresult(parent, pid) :
779 if isinstance(pid, Popen) :
780 if pid.returncode != 0 :
781 error = pid.communicate()
782 error = [str(error[0]), error[1]]
783 if error[1] is None :
785 parent.Rerror = '\n'.join([str(pid.returncode), '\n'.join(error)])
787 raise Exception('\n'.join(['Erreur R', '\n'.join(error[1:])]))
796 raise Exception('Erreur R')
804 def launchcommand(mycommand):
807 def print_liste(filename,liste):
808 with open(filename,'w') as f :
810 f.write(';'.join(graph) +'\n')
811 def read_list_file(filename, encoding = sys.getdefaultencoding()):
812 with codecs.open(filename,'r', encoding) as f:
813 content=f.readlines()
814 ncontent=[line.replace('\n','').split(';') for line in content if line.strip() != '']
817 def progressbar(self, maxi):
818 ira = wx.GetApp().GetTopWindow()
824 prog = wx.ProgressDialog("Traitements",
825 "Veuillez patienter...",
828 style=wx.PD_APP_MODAL | wx.PD_AUTO_HIDE | wx.PD_ELAPSED_TIME | wx.PD_CAN_ABORT
831 # le ABORT n'est pas géré à tous les coups ???
832 prog.SetSize((400,150))
833 #prog.SetIcon(ira._icon)
836 def treat_var_mod(variables) :
838 variables = list(set(variables))
839 varmod = [variable.split('_') for variable in variables]
840 vars = list(set([var[0] for var in varmod if len(var) >=2]))
842 mods = ['_'.join(v) for v in varmod if v[0] == var]
845 # for variable in variables :
846 # if '_' in variable :
847 # forme = variable.split('_')
850 # if not var in var_mod :
851 # var_mod[var] = [variable]
853 # if not mod in var_mod[var] :
854 # var_mod[var].append(variable)
857 def doconcorde(corpus, uces, mots, uci = False) :
859 ucestxt1 = [row for row in corpus.getconcorde(uces)]
861 ucestxt1 = [row for row in corpus.getuciconcorde(uces)]
862 ucestxt1 = dict(ucestxt1)
865 listmot = [corpus.getlems()[lem].formes for lem in mots]
866 listmot = [corpus.getforme(fid).forme for lem in listmot for fid in lem]
867 mothtml = ['<font color=red><b>%s</b></font>' % mot for mot in listmot]
868 dmots = dict(list(zip(listmot, mothtml)))
870 ucetxt = ucestxt1[uce].split()
871 ucetxt = ' '.join([dmots.get(mot, mot) for mot in ucetxt])
873 uciid = corpus.getucefromid(uce).uci
874 ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[corpus.getucefromid(uce).uci].etoiles) + '<a href="%i_%i"> *%i_%i</a></b></p>' % (uciid, uce, uciid, uce))
876 ucis_txt.append('<p><b>' + ' '.join(corpus.ucis[uce].etoiles) + '</b></p>')
877 ucestxt.append(ucetxt)
878 return ucis_txt, ucestxt
881 def getallstcarac(corpus, analyse) :
882 pathout = PathOut(analyse['ira'])
883 profils = ReadProfileAsDico(pathout['PROFILE_OUT'], Alceste, self.encoding)
886 def read_chd(filein, fileout):
887 with open(filein, 'r') as f :
889 #content = [line[3:].replace('"',"").replace(' ','') for line in content.splitlines()]
890 content = [line.split('\t') for line in content.splitlines()]
891 chd = {'name':1, 'children':[]}
893 for i, line in enumerate(content) :
895 chd['children'] = [{'name': line[1],'size' : content[i+1][0]}, {'name':line[2], 'size': content[i+1][1]}]
896 mere[line[1]] = chd['children'][0]
897 mere[line[2]] = chd['children'][1]
899 if 'children' in mere[line[0]]:
900 mere[line[0]]['children'].append({'name': line[1],'size' : content[i+1][0]})
901 mere[line[1]] = mere[line[0]]['children'][-1]
902 mere[line[0]]['children'].append({'name': line[2],'size' : content[i+1][1]})
903 mere[line[2]] = mere[line[0]]['children'][-1]
905 mere[line[0]]['children'] = [{'name': line[1],'size' : content[i+1][0]}, {'name':line[2], 'size': content[i+1][1]}]
906 mere[line[1]] = mere[line[0]]['children'][-2]
907 mere[line[2]] = mere[line[0]]['children'][-1]
908 with open(fileout, 'w') as f :
909 f.write(json.dumps(chd))
912 translation_languages = {"Afrikaans":"af", "Albanian":"sq", "Amharic":"am", "Arabic":"ar", "Armenian":"hy", "Azeerbaijani":"az", "Basque":"eu", "Belarusian":"be", "Bengali":"bn", "Bosnian":"bs", "Bulgarian":"bg", "Catalan":"ca", "Cebuano":"ceb", "Chichewa":"ny", "Chinese (Simplified)":"zh-CN", "Chinese (Traditional)":"zh-TW", "Corsican":"co", "Croatian":"hr", "Czech":"cs", "Danish":"da", "Dutch":"nl", "English":"en", "Esperanto":"eo", "Estonian":"et", "Filipino":"tl", "Finnish":"fi", "French":"fr", "Frisian":"fy", "Galician":"gl", "Georgian":"ka", "German":"de", "Greek":"el", "Gujarati":"gu", "Haitian Creole":"ht", "Hausa":"ha", "Hawaiian":"haw", "Hebrew":"iw", "Hindi":"hi", "Hmong":"hmn ", "Hungarian":"hu", "Icelandic":"is", "Igbo":"ig", "Indonesian":"id", "Irish":"ga", "Italian":"it", "Japanese":"ja", "Javanese":"jw", "Kannada":"kn", "Kazakh":"kk", "Khmer":"km", "Korean":"ko", "Kurdish":"ku", "Kyrgyz":"ky", "Lao":"lo", "Latin":"la", "Latvian":"lv", "Lithuanian":"lt", "Luxembourgish":"lb", "Macedonian":"mk", "Malagasy":"mg", "Malay":"ms", "Malayalam":"ml", "Maltese":"mt", "Maori":"mi", "Marathi":"mr", "Mongolian":"mn", "Burmese":"my", "Nepali":"ne", "Norwegian":"no", "Pashto":"ps", "Persian":"fa", "Polish":"pl", "Portuguese":"pt", "Punjabi":"ma", "Romanian":"ro", "Russian":"ru", "Samoan":"sm", "Scots Gaelic":"gd", "Serbian":"sr", "Sesotho":"st", "Shona":"sn", "Sindhi":"sd", "Sinhala":"si", "Slovak":"sk", "Slovenian":"sl", "Somali":"so", "Spanish":"es", "Sundanese":"su", "Swahili":"sw", "Swedish":"sv", "Tajik":"tg", "Tamil":"ta", "Telugu":"te", "Thai":"th", "Turkish":"tr", "Ukrainian":"uk", "Urdu":"ur", "Uzbek":"uz", "Vietnamese":"vi", "Welsh":"cy", "Xhosa":"xh", "Yiddish":"yi", "Yoruba":"yo", "Zulu":"zu", }
915 def gettranslation(words, lf, lt) :
916 import urllib.request, urllib.error, urllib.parse
918 agent = {'User-Agent':
926 .NET CLR 3.0.04506.30\
928 base_link = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=%s&tl=%s&dt=t&q=%s"
930 totrans = urllib.parse.quote('\n'.join(words))
931 link = base_link % (lf, lt, totrans)
932 request = urllib.request.Request(link, headers=agent)
933 raw_data = urllib.request.urlopen(request).read()
934 data = json.loads(raw_data)
935 return [line[0].replace("'", '_').replace(' | ', '|').replace(' ', '_').replace('-','_').replace('\n','') for line in data[0]]
937 def makenprof(prof, trans, deb=0) :
940 nprof.append(prof[0])
941 for i, val in enumerate(trans) :
942 line = prof[deb+i+1][:]
947 def treatempty(val) :
948 if val.strip() == '' :
953 def translateprofile(corpus, dictprofile, lf='it', lt='fr', maxword = 50) :
956 for i in range(len(dictprofile)) :
957 prof = dictprofile[repr(i+1)]
959 lenact = prof.index(['*****', '*', '*', '*', '*', '*', '', ''])
963 lenact = prof.index(['*', '*', '*', '*', '*', '*', '', ''])
969 lensup += prof.index(['*', '*', '*', '*', '*', '*', '', ''])
970 lensup = lensup - lenact
972 lensup += len(prof) - lenact
974 if lenact > maxword :
978 actori = [line[6] for line in prof[1:nlenact]]
979 act = [val.replace('_', ' ') for val in actori]
980 act = gettranslation(act, lf, lt)
981 for j, val in enumerate(actori) :
982 if act[j] not in lems :
985 while act[j] in lems :
986 act[j] = act[j] + "+"
988 nprof[repr(i+1)] = makenprof(prof, act)
991 if lensup > maxword :
995 supori = [line[6] for line in prof[(1+lenact):(lenact+nlensup)]]
996 sup = [val.replace('_', ' ') for val in supori]
997 sup = [treatempty(val) for val in sup]
998 sup = gettranslation(sup, lf, lt)
999 for j, val in enumerate(supori) :
1000 if sup[j] not in lems :
1003 while sup[j] in lems :
1004 sup[j] = sup[j] + "+"
1006 nprof[repr(i+1)].append(['*****', '*', '*', '*', '*', '*', '', ''])
1007 nprof[repr(i+1)] += makenprof(prof, sup, deb=lenact)
1010 lenet = prof.index(['*', '*', '*', '*', '*', '*', '', ''])
1011 nprof[repr(i+1)].append(['*', '*', '*', '*', '*', '*', '', ''])
1012 nprof[repr(i+1)] += prof[(lenet+1):]
1017 def write_translation_profile(prof, lems, language, dictpathout) :
1018 if os.path.exists(dictpathout['translations.txt']) :
1019 with codecs.open(dictpathout['translations.txt'], 'r', 'utf8') as f :
1020 translist = f.read()
1021 translist = [line.split('\t') for line in translist.splitlines()]
1025 toprint.append(['','','','','',''])
1026 toprint.append(['***', 'nb classes', repr(len(prof)), '***', '', ''])
1027 for i in range(len(prof)) :
1028 toprint.append(['**', 'classe', repr(i+1), '**', '', ''])
1029 toprint.append(['****'] + prof[repr(i+1)][0] + ['****'])
1030 rest = [[repr(line[1]), repr(line[2]), repr(line[3]), repr(line[4]), line[6], line[7].replace('< 0,0001', '0.00009').replace('NS (','').replace(')','')] for line in prof[repr(i+1)][1:]]
1031 for i, line in enumerate(prof[repr(i+1)][1:]) :
1033 rest[i] = ['*', '*', '*', '*', '*', '*']
1034 elif line[0] == '*****' :
1035 rest[i] = ['*****','*','*', '*', '*', '*']
1037 with open(dictpathout['translation_profile_%s.csv' % language], 'w') as f :
1038 f.write('\n'.join([';'.join(line) for line in toprint]))
1039 with open(dictpathout['translation_words_%s.csv' % language], 'w') as f :
1040 f.write('\n'.join(['\t'.join([val, lems[val]]) for val in lems]))
1041 if 'translation_profile_%s.csv' % language not in [val[0] for val in translist] :
1042 translist.append(['translation_profile_%s.csv' % language, 'translation_words_%s.csv' % language])
1043 with open(dictpathout['translations.txt'], 'w') as f :
1044 f.write('\n'.join(['\t'.join(line) for line in translist]))
1046 def makesentidict(infile, language) :
1047 with codecs.open(infile,'r', 'utf8') as f :
1049 content = [line.split('\t') for line in content.splitlines()]
1050 titles = content.pop(0)
1051 senti = ['Positive', 'Negative', 'Anger', 'Anticipation', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise', 'Trust']
1054 sentid[sent] = titles.index(sent)
1055 frtitle = [val for val in titles if '(fr)' in val]
1056 frid = titles.index(frtitle[0])
1057 sentidict = [[line[frid].lower(), [line[sentid[sent]] for sent in senti]] for line in content]
1058 pos = ['positive'] + [line[0] for line in sentidict if line[1][0] == '1']
1059 neg = ['negative'] + [line[0] for line in sentidict if line[1][1] == '1']
1060 anger = ['anger'] + [line[0] for line in sentidict if line[1][2] == '1']
1061 anticipation = ['anticipation'] + [line[0] for line in sentidict if line[1][3] == '1']
1062 disgust = ['disgust'] + [line[0] for line in sentidict if line[1][4] == '1']
1063 fear = ['fear'] + [line[0] for line in sentidict if line[1][5] == '1']
1064 joy = ['joy'] + [line[0] for line in sentidict if line[1][6] == '1']
1065 sadness = ['sadness'] + [line[0] for line in sentidict if line[1][7] == '1']
1066 surprise = ['surprise'] + [line[0] for line in sentidict if line[1][8] == '1']
1067 trust = ['trust'] + [line[0] for line in sentidict if line[1][9] == '1']
1068 with open('/tmp/tgenemo.csv', 'w') as f :
1069 for val in [pos, neg, anger, anticipation, disgust, fear, joy, sadness, surprise, trust] :
1070 f.write('\t'.join(val) + '\n')
1072 def countsentfromprof(prof, encoding, sentidict) :
1073 with codecs.open(prof, 'r', encoding) as f :
1075 content = [line.split(';') for line in content.splitlines()]
1077 content = [[line[0], [int(val) for val in line[1:]]] for line in content]
1079 content = dict(content)
1082 def iratolexico(infile, outfile, encoding) :
1083 with codecs.open(infile, 'r', encoding) as f :
1085 if line.startswith('**** ') :