From 9bde3d55d2131f1a33234a43c0de8b200ddb8f9a Mon Sep 17 00:00:00 2001 From: Pierre Date: Tue, 8 Jan 2013 14:47:31 +0100 Subject: [PATCH] ... --- OptionAlceste.py | 12 ++++---- PrintRScript.py | 10 +++++-- Rscripts/Rgraph.R | 6 ++-- Rscripts/afc_graph.R | 30 +++++++++----------- analysetxt.py | 4 +-- dialog.py | 7 +++++ functions.py | 7 +++-- layout.py | 19 ++++++------- openanalyse.py | 14 +++++++++- parse_factiva_mail.py | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++ textsimi.py | 7 +++-- tree.py | 8 ++++-- 12 files changed, 151 insertions(+), 50 deletions(-) create mode 100644 parse_factiva_mail.py diff --git a/OptionAlceste.py b/OptionAlceste.py index f515d7a..6465e29 100755 --- a/OptionAlceste.py +++ b/OptionAlceste.py @@ -168,8 +168,8 @@ class OptionPam(wx.Dialog): self.label_1 = wx.StaticText(self, -1, u"Lemmatisation") self.radio_1 = wx.RadioBox(self, -1, u"", choices=['oui', 'non'], majorDimension=0, style=wx.RA_SPECIFY_ROWS) - self.label_exp = wx.StaticText(self, -1, u"Utiliser le dict. des expressions") - self.radio_exp = wx.RadioBox(self, -1, u"", choices=['oui', 'non'], majorDimension=0, style=wx.RA_SPECIFY_ROWS) + #self.label_exp = wx.StaticText(self, -1, u"Utiliser le dict. des expressions") + #self.radio_exp = wx.RadioBox(self, -1, u"", choices=['oui', 'non'], majorDimension=0, style=wx.RA_SPECIFY_ROWS) txt = u"""Methode de construction de la matrice des distances""" self.label_12 = wx.StaticText(self, -1, txt) @@ -212,10 +212,10 @@ de la matrice des distances""" else: self.radio_1.SetSelection(1) expressions = self.pamconf.getboolean('pam', 'expressions') - if expressions : - self.radio_exp.SetSelection(0) - else : - self.radio_exp.SetSelection(1) + #if expressions : + # self.radio_exp.SetSelection(0) + #else : + # self.radio_exp.SetSelection(1) self.choice_1.SetSelection(self.distance.index(self.pamconf.get('pam', 'method'))) if self.pamconf.get('pam', 'cluster_type') == u'pam' : self.radio_box_3.SetSelection(0) diff --git a/PrintRScript.py b/PrintRScript.py index e93501a..18b3879 100644 --- a/PrintRScript.py +++ b/PrintRScript.py @@ -173,12 +173,12 @@ def RchdTxt(DicoPath, RscriptPath, mincl, classif_mode, nbt = 9, libsvdc = False """ % DicoPath['listeuce2'] txt += """ -# rm(data1) + rm(data1) """ if classif_mode == 0: txt += """ -# rm(data2) + rm(data2) """ txt += """ chd.result <- Rchdtxt("%s",mincl=%i,classif_mode=%i, nbt = nbt) @@ -378,6 +378,10 @@ write.csv2(gbcluster,file="%s") xmax <- max(afc$rowcoord[,1], na.rm = TRUE) + (0.1 * max(afc$rowcoord[,1], na.rm = TRUE)) ymin <- min(afc$rowcoord[,2], na.rm = TRUE) + (0.1 * min(afc$rowcoord[,2], na.rm = TRUE)) ymax <- max(afc$rowcoord[,2], na.rm = TRUE) + (0.1 * max(afc$rowcoord[,2], na.rm = TRUE)) + print(xmin) + print(xmax) + print(ymin) + print(ymax) """ % taillecar txt += """ PlotAfc2dCoul(afc, as.data.frame(chistabletot), "%s", what='coord', deb=1, fin=(debsup-1), xlab = xlab, ylab = ylab, xmin=xmin, xmax=xmax, ymin = ymin, ymax=ymax) @@ -554,7 +558,7 @@ def barplot(table, rownames, colnames, rgraph, tmpgraph, intxt = False) : if (valmin >=0) { valmin <- -2 } else { - valmin <- valmin -2 + valmin <- valmin - 2 } di[tominf] <- valmin } diff --git a/Rscripts/Rgraph.R b/Rscripts/Rgraph.R index 2ea1c38..6abd55e 100644 --- a/Rscripts/Rgraph.R +++ b/Rscripts/Rgraph.R @@ -43,7 +43,7 @@ PlotDendroCut <- function(chd,filename,reso,clusternb) { # dev.off() #} -PlotAfc2dCoul<- function(afc,chisqrtable,filename, what='coord',col=FALSE, axetoplot=c(1,2), deb=0,fin=0, width=900, height=900, quality=100, reso=200, parcex=PARCEX, xlab = NULL, ylab = NULL, xmin=NULL, xmax=NULL, ymin=NULL, ymax=NUL) { +PlotAfc2dCoul<- function(afc,chisqrtable,filename, what='coord',col=FALSE, axetoplot=c(1,2), deb=0,fin=0, width=900, height=900, quality=100, reso=200, parcex=PARCEX, xlab = NULL, ylab = NULL, xmin=NULL, xmax=NULL, ymin=NULL, ymax=NULL) { if (col) { if (what == 'coord') { rowcoord <- as.matrix(afc$colcoord) @@ -429,10 +429,10 @@ create_afc_table <- function(x) { make_afc_graph <- function(toplot, classes, clnb, xlab, ylab, cex.txt = NULL, leg = FALSE, cmd = FALSE, black = FALSE, xminmax=NULL, yminmax=NULL) { if (is.null(xminmax)) { - xminmax <- c(min(toplot[,1], na.rm = TRUE) + (0.1 * min(toplot[,1], na.rm = TRUE)), max(toplot[,1], na.rm = TRUE) + (0.1 * max(toplot[,1], na.rm = TRUE))) + xminmax <- c(min(toplot[,1], na.rm = TRUE) + ((max(cex.txt)/10) * min(toplot[,1], na.rm = TRUE)), max(toplot[,1], na.rm = TRUE) + ((max(cex.txt)/10) * max(toplot[,1], na.rm = TRUE))) } if (is.null(yminmax)) { - yminmax <- c(min(toplot[,2], na.rm = TRUE) + (0.1 * min(toplot[,2], na.rm = TRUE)), max(toplot[,2], na.rm = TRUE) + (0.1 * max(toplot[,2], na.rm = TRUE))) + yminmax <- c(min(toplot[,2], na.rm = TRUE) + ((max(cex.txt)/10) * min(toplot[,2], na.rm = TRUE)), max(toplot[,2], na.rm = TRUE) + ((max(cex.txt)/10) * max(toplot[,2], na.rm = TRUE))) } rain <- rainbow(clnb) compt <- 1 diff --git a/Rscripts/afc_graph.R b/Rscripts/afc_graph.R index defdab1..017b782 100644 --- a/Rscripts/afc_graph.R +++ b/Rscripts/afc_graph.R @@ -31,6 +31,10 @@ tchi <- %s tchi.min <- %i tchi.max <- %i dirout <- '%s' +#xmin <- xmin +#xmax <- xmax +#ymin <- ymin +#ymax <- ymax xlab <- paste('facteur ', x, ' -') ylab <- paste('facteur ', y, ' -') @@ -58,10 +62,10 @@ if ( qui == 3 ) { } classes <- c(1:clnb) maxchi <- 1 - cex.par <- NULL + cex.par <- rep(taillecar/10, nrow(table.in)) } else { if ( what == 0 ) table.in <- afc$rowcoord - if ( what == 1 ) table.in <- afc$rowcrl*2 + if ( what == 1 ) table.in <- afc$rowcrl rownames(table.in) <- afc$rownames tablechi <- chistabletot rn.keep <- c() @@ -107,19 +111,6 @@ if ( qui == 3 ) { } } -# if (over) { -# rn <- rownames(table.in) -# rownames(table.in) <- 1:nrow(table.in) -# table.in <- unique(table.in) -# rn.keep <- as.numeric(rownames(table.in)) -# rownames(table.in) <- rn[rn.keep] -# tablechi <- tablechi[rn.keep,] -# if (qui==0) { -# cex.par <- cex.par[rn.keep] -# } else { -# cex.par <- NULL -# } -# } if (do.select.nb) { if (select.nb > nrow(table.in)) select.nb <- nrow(table.in) row.keep <- select_point_nb(tablechi, select.nb) @@ -139,7 +130,12 @@ if ( qui == 3 ) { } classes <- apply(tablechi, 1, which.max) maxchi <- apply(tablechi, 1, max) - + infp <- which(is.infinite(maxchi) & maxchi > 0) + if (length(infp)) { + maxchi[infp] <- NA + valmax <- max(maxchi, na.rm = TRUE) + maxchi[infp] <- valmax + 2 + } if (cex.txt) { #row.keep <- append(row.keep, rn.keep) #row.keep <- unique(row.keep) @@ -149,7 +145,7 @@ if ( qui == 3 ) { cex.par <- maxchi cex.par <- norm.vec(cex.par, tchi.min/10, tchi.max/10) } else { - cex.par <- NULL + cex.par <- rep(taillecar/10, nrow(table.in)) } } diff --git a/analysetxt.py b/analysetxt.py index e3e1244..cd3ac77 100644 --- a/analysetxt.py +++ b/analysetxt.py @@ -150,7 +150,7 @@ class Alceste(AnalyseText) : elif self.parametres['classif_mode'] == 2 : self.corpus.make_and_write_sparse_matrix_from_uci(self.actives, self.pathout['TableUc1'], self.pathout['listeuce1']) Rscript = self.printRscript() - self.doR(Rscript) + self.doR(Rscript, dlg = self.dlg, message = 'CHD...') #self.lc = make_ucecl_from_R(self.pathout['uce']) #self.lc0 = self.lc.pop(0) self.corpus.make_ucecl_from_R(self.pathout['uce']) @@ -161,7 +161,7 @@ class Alceste(AnalyseText) : self.clnb = len(self.corpus.lc) self.parametres['clnb'] = self.clnb Rscript = self.printRscript2() - self.doR(Rscript) + self.doR(Rscript, dlg = self.dlg, message = 'profils et A.F.C. ...') self.time = time() - self.t1 minutes, seconds = divmod(self.time, 60) hours, minutes = divmod(minutes, 60) diff --git a/dialog.py b/dialog.py index 2e19745..71b0e6f 100755 --- a/dialog.py +++ b/dialog.py @@ -576,6 +576,8 @@ class PrefGraph(wx.Dialog): else : choix=[u'2D' ,u'3D'] self.choicetype = wx.Choice(self, -1, (100,50), choices=choix) + self.label_format = wx.StaticText(self, -1, u"Format de l'image") + self.choix_format = wx.Choice(self, -1, (100,50), choices = ['png', 'svg']) self.label_1 = wx.StaticText(self, -1, u'Largeur') self.spin1 = wx.SpinCtrl(self, -1, '',size = (100,30), min=100, max=5000) self.label_2 = wx.StaticText(self, -1, u'Hauteur') @@ -721,6 +723,11 @@ au chi2 d'association de la forme""" fsizer.Add(wx.StaticLine(self, -1), 0, wx.EXPAND, 0) fsizer.Add(wx.StaticLine(self, -1), 0, wx.EXPAND, 0) + fsizer.Add(self.label_format, 0, wx.ALL | wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 5) + fsizer.Add(self.choix_format, 0, wx.ALL | wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 5) + fsizer.Add(wx.StaticLine(self, -1), 0, wx.EXPAND, 0) + fsizer.Add(wx.StaticLine(self, -1), 0, wx.EXPAND, 0) + fsizer.Add(self.label_what, 0, wx.ALL | wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 5) fsizer.Add(self.choice1, 0, wx.ALL | wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 5) fsizer.Add(wx.StaticLine(self, -1), 0, wx.EXPAND, 0) diff --git a/functions.py b/functions.py index 54ead63..730d135 100644 --- a/functions.py +++ b/functions.py @@ -64,7 +64,7 @@ class History : tosave['corpus'] = analyse['corpus'] tosave['name'] = analyse['name'] acorpus_uuid = analyse['corpus'] - if acorpus_uuid in self.ordercorpus : + if acorpus_uuid in self.corpus : if 'analyses' in self.history[self.ordercorpus[acorpus_uuid]] : self.history[self.ordercorpus[acorpus_uuid]]['analyses'].append(tosave) else : @@ -511,6 +511,7 @@ def check_Rresult(parent, pid) : if isinstance(pid, Popen) : if pid.returncode != 0 : error = pid.communicate() + print error error = [str(error[0]), error[1]] if error[1] is None : error[1] = 'None' @@ -521,7 +522,7 @@ def check_Rresult(parent, pid) : #except : # BugReport(parent) else : - return None + return True else : if pid != 0 : #try : @@ -530,7 +531,7 @@ def check_Rresult(parent, pid) : #except : # BugReport(parent) else : - return None + return True def print_liste(filename,liste): with open(filename,'w') as f : diff --git a/layout.py b/layout.py index 9611dbb..aed4de2 100644 --- a/layout.py +++ b/layout.py @@ -89,6 +89,7 @@ class GraphPanelAfc(wx.Panel): 'facteur' : [1,2,3], 'alpha' : 10, 'clnb' : clnb, + 'svg' : 0, } self.__set_properties() @@ -155,6 +156,10 @@ class GraphPanelAfc(wx.Panel): dial.CenterOnParent() val = dial.ShowModal() if val == wx.ID_OK : + if dial.choix_format.GetSelection() == 0 : + svg = 0 + else : + svg = 1 self.param = {'typegraph' : dial.choicetype.GetSelection(), 'width' : dial.spin1.GetValue(), 'height' : dial.spin2.GetValue(), @@ -177,7 +182,8 @@ class GraphPanelAfc(wx.Panel): 'facteur' : [dial.spin_f1.GetValue(),dial.spin_f2.GetValue(), dial.spin_f3.GetValue()], 'clnb' : self.clnb, 'film' : str(dial.film.GetValue()).upper(), - 'alpha' : dial.slider_sphere.GetValue() + 'alpha' : dial.slider_sphere.GetValue(), + 'svg' : svg } self.nb.parent = self.ira self.DictPathOut = self.Dict @@ -190,12 +196,6 @@ class GraphPanelAfc(wx.Panel): afc <- afcf afc_table <- afcf_table chistabletot <- specfp - infp <- which(is.infinite(chistabletot) & chistabletot > 0) - infm <- which(is.infinite(chistabletot) & chistabletot < 0) - chistabletot[infp] <- 0 - chistabletot[infm] <- 0 - chistabletot[infp] <- max(chistabletot) + 1 - chistabletot[infm] <- min(chistabletot) - 1 """ elif self.itempath == 'liste_graph_afct' : txt +=""" @@ -587,9 +587,8 @@ def PrintRapport(self, corpus, parametres, txt = True): temps d'analyse : %s ########################### """ % parametres['time'] - file = open(self.pathout['pre_rapport'], 'w') - file.write(txt) - file.close() + with open(self.pathout['pre_rapport'], 'w') as f : + f.write(txt) class dolexlayout : def __init__(self, ira, corpus, parametres): diff --git a/openanalyse.py b/openanalyse.py index 0c0fff7..1c5a554 100644 --- a/openanalyse.py +++ b/openanalyse.py @@ -12,7 +12,7 @@ from tableau import Tableau import os import shelve #from ConfigParser import * -#from tabsimi import DoSimi +from tabsimi import DoSimi from functions import BugReport, DoConf import logging @@ -33,12 +33,22 @@ class OpenAnalyse(): if self.conf['type'] == 'corpus' : corpus = self.opencorpus() elif self.conf['corpus'] in self.parent.history.corpus : + print 'corpus in history.corpus' + if self.conf['uuid'] in self.parent.history.analyses : + intree = True + else : + intree = False corpus = self.openanalyse() if self.conf.get('lem',1) : corpus.make_lems(True) else : corpus.make_lems(False) self.doopen(corpus) + if not intree : + self.parent.tree.AddAnalyse(self.conf) + else : + print 'passe apr la' + print self.parent.tree.GiveFocus(uuid = self.conf['uuid'], bold = True) else : corpus = None self.parent.history.addtab(self.conf) @@ -83,6 +93,7 @@ class OpenAnalyse(): if os.path.exists(self.parent.history.history[self.parent.history.ordercorpus[self.conf['corpus']]]['ira']) : corpus = Corpus(self, parametres = DoConf(self.parent.history.history[self.parent.history.ordercorpus[self.conf['corpus']]]['ira']).getoptions('corpus'), read = self.parent.history.history[self.parent.history.ordercorpus[self.conf['corpus']]]['ira']) self.parent.history.openedcorpus[self.conf['corpus']] = corpus + self.parent.history.add(self.conf) return corpus def doopen(self, corpus) : @@ -104,3 +115,4 @@ class OpenAnalyse(): elif self.conf['type'] == 'wordcloud' : self.parent.ShowMenu(_("Text analysis")) WordCloudLayout(self.parent, corpus, self.conf) + diff --git a/parse_factiva_mail.py b/parse_factiva_mail.py new file mode 100644 index 0000000..50e4510 --- /dev/null +++ b/parse_factiva_mail.py @@ -0,0 +1,77 @@ +#!/bin/env python +# -*- coding: utf-8 -*- +#Author: Pierre Ratinaud +#Copyright (c) 2012 Pierre Ratinaud +#Lisense: GNU/GPL + +import os +import codecs + + +#txtdir = 'dev/factiva_txt' #repertoire des textes +#txtdir = 'corpus/jeunesdebanlieues' +#fileout = 'dev/factiva_txt_out.txt' +#encodage_in = 'utf8' +#encodage_out = 'utf8' + + +def parsetxtmail(txt): + """ + parser de texte pour factiva + """ + no = ['NS','RE','IPD','CO','IN'] # les balises qui signalent une fin + txt = txt.splitlines() #met le texte dans une liste de lignes + txt.pop(0) # la premiere ligne sert a rien + txt = txt[0:(len(txt)-10)] # les dernieres lignes ne servent a rien + keepline = False + ucis = [] + for line in txt : #pour chaque ligne du texte... + if line.startswith('---------------------------------------------------------------') : # si la ligne commence avec... + ucis.append([['****'],'']) # c'est une nouvelle uci + keepline = False + elif line.startswith('SN ') : #source + source = '*source_' + line[4:].replace(' ','').replace('\'','').replace(u'´','').replace(u'’','').replace('-','').lower() + ucis[-1][0].append(source) + elif line.startswith('PD ') : #date + mois_annee = u'*ma_' + line[4:].split(' ')[1] + line[4:].split(' ')[2] + ucis[-1][0].append(mois_annee) + annee = u'*annee_' + line[4:].split(' ')[2] + ucis[-1][0].append(annee) + elif line in no : #fin + keepline = False + elif line.startswith('RF ') : #fin + keepline = False + elif line in ['LP', 'TD'] : #debut texte + keepline = True + else : + pass + if keepline and line not in ['LP', 'TD'] : + ucis[-1][1] = '\n'.join([ucis[-1][1],line]) + return ucis + + +def print_ucis(ucis, ofile, encodage) : + ucis = [uci for uci in ucis if uci[1].strip() != ''] + toprint = '\n'.join(['\n'.join([' '.join(uci[0]),uci[1]]) for uci in ucis]) + ofile.write(toprint.encode(encodage)) + +class ParseFactivaMail : + def __init__(self, txtdir, fileout, encodage_in, encodage_out) : + files = os.listdir(txtdir) #liste des fichiers dans txtdir + with open(fileout,'w') as outf : #ouverture du fichier en sortie + for f in files : #pour chaque fichier en entree... + f= os.path.join(txtdir, f) #chemin du fichier + with codecs.open(f, 'r', encodage_in) as infile : #ouverture du fichier + content = infile.read() #lecture du fichier + ucis = parsetxtmail(content) + print_ucis(ucis, outf, encodage_out) + +#for dat in ['2001','2002','2003','2004', '2005','2006','2007','2008','2009','2010','2011'] : +# path = os.path.join(txtdir,dat) +# outfile = os.path.join(txtdir, 'corpus_' + dat + '.txt') +# doparse(path, outfile) + + +if __name__ == '__main__' : + doparse(txtdir, fileout, encodage_in, encodage_out) + print 'fini' diff --git a/textsimi.py b/textsimi.py index dc75f6e..918ee44 100644 --- a/textsimi.py +++ b/textsimi.py @@ -20,7 +20,7 @@ from copy import copy import logging -logger = logging.getLogger('iramuteq.textsimi') +log = logging.getLogger('iramuteq.textsimi') class SimiTxt(AnalyseText): def doanalyse(self) : @@ -48,7 +48,8 @@ class SimiTxt(AnalyseText): self.makefiles() script = PrintSimiScript(self) script.make_script() - if not self.doR(script.scriptout) : + if not self.doR(script.scriptout, dlg = self.dlg, message = 'R...') : + log.info('Problem') return False if self.parametres['type_graph'] == 1: if os.path.exists(self.pathout['liste_graph']): @@ -157,7 +158,7 @@ class SimiFromCluster(SimiTxt) : self.makefiles() script = PrintSimiScript(self) script.make_script() - if self.doR(script.scriptout) : + if not self.doR(script.scriptout, dlg = self.dlg, message = 'R ...') : return False if self.parametres['type_graph'] == 1: if os.path.exists(self.pathout['liste_graph']): diff --git a/tree.py b/tree.py index 0009136..0d1aa3f 100644 --- a/tree.py +++ b/tree.py @@ -244,17 +244,21 @@ class LeftTree(CT.CustomTreeCtrl): self.CloseItem(child, uuid) child, cookie = self.GetNextChild(itemParent, cookie) - def GiveFocus(self, itemParent = None, uuid = None) : + def GiveFocus(self, itemParent = None, uuid = None, bold = False) : if itemParent is None : itemParent = self.root child, cookie = self.GetFirstChild(itemParent) + print child, cookie while child : pydata = self.GetPyData(child) if pydata['uuid'] == uuid : self.SelectItem(child) - break + if bold : + self.SetItemBold(child, True) + return 'kool' self.GiveFocus(child, uuid) child, cookie = self.GetNextChild(itemParent, cookie) + return 'pas kool' def OnRightDown(self, event): -- 2.7.4