X-Git-Url: http://www.iramuteq.org/git?a=blobdiff_plain;f=import_txm.py;h=37ffa2e71185a53aeecc002291ee45be18b3fbb6;hb=eaa044d1147e26b82942ce56d5965c83fdddf069;hp=0dfd1ebb82e7d61f50f8b3f4cbcef97f3921317d;hpb=10d67a5cd48583c060b6a0e77e87c41f80671027;p=iramuteq diff --git a/import_txm.py b/import_txm.py index 0dfd1eb..37ffa2e 100644 --- a/import_txm.py +++ b/import_txm.py @@ -1,10 +1,12 @@ -#!/bin/env python # -*- coding: utf-8 -*- #Author: Pierre Ratinaud -#Copyright (c) 2013 Pierre Ratinaud +#Copyright (c) 2008-2020 Pierre Ratinaud +#modification pour python 3 : Laurent Mérat, 6x7 - mai 2020 #License: GNU/GPL - +#------------------------------------ +# import des modules python +#------------------------------------ import os import xml.sax import glob @@ -37,24 +39,24 @@ class TXMParser(xml.sax.ContentHandler) : def characters(self, content) : if self.name == 'txm:form' : - if content not in [u'', u' ', u'\n', '\r'] : + if content not in ['', ' ', '\n', '\r'] : self.sent.append(content.rstrip('\n\r')) #self.fileout.write(content.encode('utf8')) def text2stars(self, attrs) : - stars = ['_'.join(val).replace(' ', '_').replace("'", '_').replace('/','').replace('.','').replace(';', '').replace(':', '').replace(u'·','') for val in attrs.items()] - stars = [''.join([u'*', val]) for val in stars] - stars = u'**** ' + ' '.join(stars) - self.fileout.write(stars.encode(self.encodage_out)) + stars = ['_'.join(val).replace(' ', '_').replace("'", '_').replace('/','').replace('.','').replace(';', '').replace(':', '').replace('·','') for val in list(attrs.items())] + stars = [''.join(['*', val]) for val in stars] + stars = '**** ' + ' '.join(stars) + self.fileout.write(stars) self.fileout.write('\n') def printsent(self) : if self.sent != [] : sent = ' ' + ' '.join(self.sent) - for val in [u' .', u' ,', u' ;', u' :', u' ?', u' !', u' -'] : + for val in [' .', ' ,', ' ;', ' :', ' ?', ' !', ' -'] : sent = sent.replace(val, val.strip()) sent = sent.replace("' ", "'") - self.fileout.write(sent.encode(self.encodage_out)) + self.fileout.write(sent) self.sent = [] def TXM2IRA(pathin, fileout, encodage_in, encodage_out) : @@ -67,4 +69,4 @@ def TXM2IRA(pathin, fileout, encodage_in, encodage_out) : for f in files : parser.parse(open(f, 'r')) fout.write('\n\n') - return None \ No newline at end of file + return None