Subject: Re: [RFC] moving translations off sheets ?
Date: Sat, 9 Jun 2001 18:16:44 +0200
Le sam, jun 09, 2001, à 06:13:32 +0200, Cyrille Chepelov a écrit:
> Mmmmhhh... Looks like it does what my code does (but in a more generic way).
> My little script (attached here) is more crude: it just snarfs all strings
forgot it...
--
Grumpf.
# This program extracts sheet translations and puts them in a C file for
# further extraction by xgettext(1).
#
# Copyright (C) 2001, Cyrille Chepelov <chepelov@calixo.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
from xml.sax import saxexts
from xml.sax import saxlib
import sys,string,pickle
# Note: as a list of strings is not a hashable type, whenever the key of a
# dictionary was to be a list of strings, it'll be implicitly replaced by the
# pickled equivalent.
def list_merge(a,b):
for x in b:
if not b in a: a.append(x)
def message_merge(msgdict,message,translations):
#print "message_merge(%s) trans=%s" % (message,translations)
msgkey = pickle.dumps(message)
if not msgdict.has_key(msgkey):
msgdict[msgkey] = {}
for k,v in translations.items():
if msgdict[msgkey].has_key(k):
mvalue = msgdict[msgkey][k]
if v != mvalue:
sys.stdout.write(
"""W: translation collision on message '%s' for language %s
W: (previous translation '%s')
W: (new(ignored) translation '%s')
""" % (message,k,mvalue,v))
else:
msgdict[msgkey][k] = v
def places_merge(placedict,message,places):
msgkey = pickle.dumps(message)
if not placedict.has_key(msgkey):
placedict[msgkey] = []
list_merge(placedict[msgkey],places)
class SheetTranslationSlurper(saxlib.DocumentHandler):
def __init__(self):
self.elemstk = []
self.langstk = []
self.namestk = []
def setDocumentLocator(self,locator):
self.locator = locator
saxlib.DocumentHandler.setDocumentLocator(self,locator)
self.desctransstk = [] # of dictionaries[lang->translated message]
self.descidstk = [] # of strings (untranslated messages)
self.sheetnameidstk = [] # (should have at most one element)
self.sheetnametransstk = [] # (ditto)
self.inlangstk = [] # of language codes
self.issheetnameid = 0
self.issheetnametrans = '' # or the language code
self.isdescid = 0
self.isdesctrans = '' # or the language code
self.placestk = [] # of lists of (filename,name)
self.datastack = [] # of strings
self.messages = {} # of dictionaries[msgid(string)->dictionary[langid(string)->translated(string)]]
self.places = {}
self.warning("locator is set now")
def warning(self,message):
sys.stdout.write("W:%s:L%d:C%d: %s\n" % (self.locator.getSystemId(),
self.locator.getLineNumber(),
self.locator.getColumnNumber(),
message))
def resetstate(self):
self.issheetnameid = 0
self.issheetnametrans = ''
self.isdescid = 0
self.isdesctrans = ''
def startElement(self,name,attrs):
self.resetstate()
#print "start of ",name,attrs,attrs.map
attmap = attrs.map
self.elemstk.append(name)
if (name == "sheet") or (name == "object"):
self.langstk.append({})
if attmap.has_key('name'):
name = 'Object "%s"' % attmap['name']
else:
name = 'Sheet "%s"' % self.locator.getSystemId()
self.sheetnametransstk.append({})
self.sheetnameidstk.append("")
self.namestk.append(name)
self.desctransstk.append({})
self.descidstk.append("")
elif (name == "name"):
if attmap.has_key("xml:lang"):
self.issheetnametrans = attmap["xml:lang"]
else:
self.placestk.append( ("N"+self.locator.getSystemId(),
self.locator.getLineNumber()) )
self.issheetnameid = 1
elif (name == "description"):
if attmap.has_key("xml:lang"):
self.isdesctrans = attmap["xml:lang"]
else:
self.placestk.append( ("D"+self.locator.getSystemId(),
self.locator.getLineNumber()) )
self.isdescid = 1
self.datastack.append("")
#print self.placestk
def characters(self,ch, start, length):
s = ch[start:start+length]
self.datastack[-1] = self.datastack[-1] + s
def endElement(self,name):
#print "end of ",name,
#print self.placestk,
popped = self.elemstk.pop()
if popped != name:
raise Exception("stack error somewhere...")
data = self.datastack.pop()
if self.issheetnameid:
self.sheetnameidstk[-1] = self.sheetnameidstk[-1] + string.strip(data)
places = self.placestk.pop()
places_merge(self.places,self.sheetnameidstk[-1],places)
elif self.isdescid:
self.descidstk[-1] = self.descidstk[-1] + string.strip(data)
places = self.placestk.pop()
places_merge(self.places,self.descidstk[-1],places)
elif self.issheetnametrans:
lang = self.issheetnametrans
dict = self.sheetnametransstk[-1]
if dict.has_key(lang):
self.warning("possible translation collision for lang='%s'" % lang)
dict[lang] = [string.strip(data)]
elif self.isdesctrans:
lang = self.isdesctrans
dict = self.desctransstk[-1]
if dict.has_key(lang):
self.warning("possible translation collision for lang='%s'" % lang)
dict[lang] = [string.strip(data)]
elif (name == "sheet") or (name == "object"):
res = self.langstk.pop()
self.namestk.pop()
desctrans = self.desctransstk.pop()
descid = self.descidstk.pop()
message_merge(self.messages,[descid],desctrans)
if name == "sheet":
sheetnameid = self.sheetnameidstk.pop()
sheetnametrans = self.sheetnametransstk.pop()
message_merge(self.messages,[sheetnameid],sheetnametrans)
#print res
self.resetstate()
# Load parser and driver
p=saxexts.make_parser()
#p=saxexts.XMLValParserFactory.make_parser()
sts=SheetTranslationSlurper()
p.setDocumentHandler(sts)
#p.setEntityResolver(BasicEntityResolver())
def extract_sheet_messages(name):
try:
p.parse(name)
return sts.messages,sts.places
except IOError,e:
sys.stderr.write("E: %s: %s\n" % (name,str(e)))
except saxlib.SAXException,e:
sys.stderr.write("E: %s\n" % str(e))
return None,None
## def load_potfile(potfilename):
## messages = {} # dictionary[mesgid(list of strings)] of dictionary[langcode(string)] of list of strings
## comments = {} # dictionary[mesgid(list of strings)] of list of strings (without #)
## dot = {} # dictionary[mesgid(list of strings)] of list of strings (without #)
## places = {} # dictionary[mesgid(list of strings)] of list of (filename(string),linenum(integer))
## attributes = {} # dictionary[mesgid(list of strings)] of list of strings (without #)
## return messages,comments,dot,places,attributes
## def output_potfile(outfilename,lang,messages,comments,dot,places,attributes):
## if not outfilename: out = sys.stdout
## else: out = open(outfilename,"w")
## for key,msgstrs in messages.items():
## msgid = pickle.loads(key)
def output_pseudo_pofile(outfilename,lang,messages):
if not outfilename: out = sys.stdout
else: out = open(outfilename,"w")
for k,v in messages.items():
if v.has_key(lang):
msg = pickle.loads(k)
out.write('msgid %s\n' % string.join(map(lambda x: '"%s"' %x,
msg),"\n"))
out.write('msgstr %s\n' %
string.join(map(lambda x: '"%s"' %x,v[lang]),"\n"))
# warning: this will probably be an UTF-8 stream. Pipe it through
# iconv(1) as needed.
out.write("\n")
def make_c_escapes(s):
rs = ""
for c in s:
if (c in range(32)) or (c == ord('"')):
rs = rs + (r"\x%x" % c)
else:
rs = rs + c
return rs
def output_c_file(outfilename,messages):
if not outfilename: out = sys.stdout
else: out = open(outfilename,"w")
msgs = map(lambda m: string.join(map(make_c_escapes,pickle.loads(m)),"\n"),
messages.keys())
c_msgs = map(lambda x: '_N("%s")'%x, msgs)
c_file = """/* AUTOMATICALLY GENERATED FILE. DO NOT EDIT ! */
/* This file has been automatically generated from the sheet files. It's here
so that xgettext can extract these strings and merge them into dia.pot.
It will not actually be compiled. */
#include <intl.h>
static const char *all_sheet_strings[] = {
%s};
""" % string.join(c_msgs,",\n")
out.write(c_file)
def merge_messages(messages,newmessages):
for k,v in newmessages.items():
msg = pickle.loads(k)
message_merge(messages,msg,v)
def merge_places(places,newplaces):
for k,v in newplaces.items():
place = pickle.loads(k)
places_merge(places,place,v)
if len(sys.argv)<3:
print "Usage: %s <dia.pot> <sheet.sheet>" % sys.argv[0]
print
print " <dia.pot>: file name of the POT file to merge into"
print " <sheet.sheet>: file name of the sheet to merge"
sys.exit(1)
potfilename = sys.argv[1]
fnames = sys.argv[2:]
import pprint
#messages,comments,dot,places,attributes = load_potfile(potfilename)
messages = {}
places = {}
for name in fnames:
sheet_messages,sheet_places = extract_sheet_messages(name)
merge_messages(messages,sheet_messages)
merge_places(places,sheet_places)
#pprint.pprint(messages)
#pprint.pprint(places)
lang="fr"
outfilename = "foo.c"
#output_potfile(outfilename,lang,messages,comments,dot,places,attributes)
output_pseudo_pofile(outfilename,"fr",messages)
output_c_file(outfilename,messages)