1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """classes that hold units of comma-separated values (.csv) files (csvunit)
23 or entire files (csvfile) for use with localisation
24 """
25
26 import csv
27 import codecs
28 try:
29 import cStringIO as StringIO
30 except:
31 import StringIO
32
33 from translate.misc import sparse
34 from translate.storage import base
35
36
38
39 - def __init__(self, fileobj, fieldnames):
40 self.fieldnames = fieldnames
41 self.contents = fileobj.read()
42 self.parser = sparse.SimpleParser(defaulttokenlist=[",", "\n"], whitespacechars="\r")
43 self.parser.stringescaping = 0
44 self.parser.quotechars = '"'
45 self.tokens = self.parser.tokenize(self.contents)
46 self.tokenpos = 0
47
50
57
59 lentokens = len(self.tokens)
60 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n":
61 self.tokenpos += 1
62 if self.tokenpos >= lentokens:
63 raise StopIteration()
64 thistokens = []
65 while self.tokenpos < lentokens and self.tokens[self.tokenpos] != "\n":
66 thistokens.append(self.tokens[self.tokenpos])
67 self.tokenpos += 1
68 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n":
69 self.tokenpos += 1
70 fields = []
71
72 currentfield = ''
73 fieldparts = 0
74 for token in thistokens:
75 if token == ',':
76
77 if fieldparts == 1:
78 currentfield = self.getvalue(currentfield)
79 fields.append(currentfield)
80 currentfield = ''
81 fieldparts = 0
82 else:
83 currentfield += token
84 fieldparts += 1
85
86 if fieldparts:
87 if fieldparts == 1:
88 currentfield = self.getvalue(currentfield)
89 fields.append(currentfield)
90 values = {}
91 for fieldnum in range(len(self.fieldnames)):
92 if fieldnum >= len(fields):
93 values[self.fieldnames[fieldnum]] = ""
94 else:
95 values[self.fieldnames[fieldnum]] = fields[fieldnum]
96 return values
97
102
103 csv.register_dialect('default', DefaultDialect)
104
111
118
119 -class csvunit(base.TranslationUnit):
120 spreadsheetescapes = [("+", "\\+"), ("-", "\\-"), ("=", "\\="), ("'", "\\'")]
121
123 super(csvunit, self).__init__(source)
124 self.location = ""
125 self.source = source or ""
126 self.target = ""
127 self.id = ""
128 self.fuzzy = 'False'
129 self.developer_comments = ""
130 self.translator_comments = ""
131 self.context = ""
132
134 if self.id:
135 return self.id
136
137 result = self.source
138 context = self.context
139 if context:
140 result = u"%s\04%s" % (context, result)
141
142 return result
143
146
148
149 return [self.location]
150
152 self.location = location
153
154 - def getcontext(self):
156
157 - def setcontext(self, value):
159
161 if origin is None:
162 result = self.translator_comments
163 if self.developer_comments:
164 if result:
165 result += '\n' + self.developer_comments
166 else:
167 result = self.developer_comments
168 return result
169 elif origin == "translator":
170 return self.translator_comments
171 elif origin in ('programmer', 'developer', 'source code'):
172 return self.developer_comments
173 else:
174 raise ValueError("Comment type not valid")
175
176 - def addnote(self, text, origin=None, position="append"):
177 if origin in ('programmer', 'developer', 'source code'):
178 if position == 'append' and self.developer_comments:
179 self.developer_comments += '\n' + text
180 elif position == 'prepend' and self.developer_comments:
181 self.developer_comments = text + '\n' + self.developer_comments
182 else:
183 self.developer_comments = text
184 else:
185 if position == 'append' and self.translator_comments:
186 self.translator_comments += '\n' + text
187 elif position == 'prepend' and self.translator_comments:
188 self.translator_comments = self.translator_comments + '\n' + text
189 else:
190 self.translator_comments = text
191
193 self.translator_comments = u''
194
196 if self.fuzzy.lower() in ('1', 'x', 'true', 'yes','fuzzy'):
197 return True
198 return False
199
201 if value:
202 self.fuzzy = 'True'
203 else:
204 self.fuzzy = 'False'
205
207 """see if unit might be a header"""
208 some_value = False
209 for key, value in self.todict().iteritems():
210 if value:
211 some_value = True
212 if key.lower() != 'fuzzy' and value and key.lower() != value.lower():
213 return False
214 return some_value
215
224
233
234 - def fromdict(self, cedict, encoding='utf-8'):
256
257
258
259 - def todict(self, encoding='utf-8'):
260
261
262 source = self.source
263 target = self.target
264 output = {
265 'location': from_unicode(self.location, encoding),
266 'source': from_unicode(source, encoding),
267 'target': from_unicode(target, encoding),
268 'id': from_unicode(self.id, encoding),
269 'fuzzy': str(self.fuzzy),
270 'context': from_unicode(self.context, encoding),
271 'translator_comments': from_unicode(self.translator_comments, encoding),
272 'developer_comments': from_unicode(self.developer_comments, encoding),
273 }
274
275 return output
276
279
280 canonical_field_names = ('location', 'source', 'target', 'id', 'fuzzy', 'context', 'translator_comments', 'developer_comments')
281 fieldname_map = {
282 'original': 'source',
283 'untranslated': 'source',
284 'translated': 'target',
285 'translation': 'target',
286 'identified': 'id',
287 'key': 'id',
288 'label': 'id',
289 'transaltor comments': 'translator_comments',
290 'notes': 'translator_comments',
291 'developer comments': 'developer_comments',
292 'state': 'fuzzy',
293 }
294
295 EXTRA_KEY = '__CSVL10N__EXTRA__'
297
298 try:
299 inputfile.seek(0)
300 reader = csv.DictReader(inputfile, fieldnames=fieldnames, dialect=dialect, restkey=EXTRA_KEY)
301 except csv.Error:
302 try:
303 inputfile.seek(0)
304 reader = csv.DictReader(inputfile, fieldnames=fieldnames, dialect='default', restkey=EXTRA_KEY)
305 except csv.Error:
306 inputfile.seek(0)
307 reader = csv.DictReader(inputfile, fieldnames=fieldnames, dialect='excel', restkey=EXTRA_KEY)
308 return reader
309
311 """check if fieldnames are valid"""
312 for fieldname in fieldnames:
313 if fieldname in canonical_field_names and fieldname == 'source':
314 return True
315 elif fieldname in fieldname_map and fieldname_map[fieldname] == 'source':
316 return True
317 return False
318
320 """Test if file has a header or not, also returns number of columns in first row"""
321 inputfile = StringIO.StringIO(sample)
322 try:
323 reader = csv.reader(inputfile, dialect)
324 except csv.Error:
325 try:
326 inputfile.seek(0)
327 reader = csv.reader(inputfile, 'default')
328 except csv.Error:
329 inputfile.seek(0)
330 reader = csv.reader(inputfile, 'excel')
331
332 header = reader.next()
333 columncount = max(len(header), 3)
334 if valid_fieldnames(header):
335 return header
336 return fieldnames[:columncount]
337
338 -class csvfile(base.TranslationStore):
339 """This class represents a .csv file with various lines.
340 The default format contains three columns: location, source, target"""
341 UnitClass = csvunit
342 Name = _("Comma Separated Value")
343 Mimetypes = ['text/comma-separated-values', 'text/csv']
344 Extensions = ["csv"]
345
346 - def __init__(self, inputfile=None, fieldnames=None, encoding="auto"):
347 base.TranslationStore.__init__(self, unitclass=self.UnitClass)
348 self.units = []
349 self.encoding = encoding or 'utf-8'
350 if not fieldnames:
351 self.fieldnames = ['location', 'source', 'target', 'id', 'fuzzy', 'context', 'translator_comments', 'developer_comments']
352 else:
353 if isinstance(fieldnames, basestring):
354 fieldnames = [fieldname.strip() for fieldname in fieldnames.split(",")]
355 self.fieldnames = fieldnames
356 self.filename = getattr(inputfile, 'name', '')
357 self.dialect = 'default'
358 if inputfile is not None:
359 csvsrc = inputfile.read()
360 inputfile.close()
361 self.parse(csvsrc)
362
363
364 - def parse(self, csvsrc):
365 text, encoding = self.detect_encoding(csvsrc, default_encodings=['utf-8', 'utf-16'])
366
367 if encoding and encoding.lower() != 'utf-8':
368 csvsrc = text.encode('utf-8').lstrip(codecs.BOM_UTF8)
369 self.encoding = encoding or 'utf-8'
370
371 sniffer = csv.Sniffer()
372
373 sample = csvsrc[:1024]
374 if isinstance(sample, unicode):
375 sample = sample.encode('utf-8')
376
377 try:
378 self.dialect = sniffer.sniff(sample)
379 if not self.dialect.escapechar:
380 self.dialect.escapechar = '\\'
381 if self.dialect.quoting == csv.QUOTE_MINIMAL:
382
383 self.dialect.quoting = csv.QUOTE_ALL
384 self.dialect.doublequote = True
385 except csv.Error:
386 self.dialect = 'default'
387
388 try:
389 fieldnames = detect_header(sample, self.dialect, self.fieldnames)
390 self.fieldnames = fieldnames
391 except csv.Error:
392 pass
393
394 inputfile = csv.StringIO(csvsrc)
395 reader = try_dialects(inputfile, self.fieldnames, self.dialect)
396
397
398 first_row = True
399 for row in reader:
400 newce = self.UnitClass()
401 newce.fromdict(row)
402 if not first_row or not newce.match_header():
403 self.addunit(newce)
404 first_row = False
405
416
418 outputfile = StringIO.StringIO()
419 writer = csv.DictWriter(outputfile, self.fieldnames, extrasaction='ignore', dialect=self.dialect)
420
421 hdict = dict(map(None, self.fieldnames, self.fieldnames))
422 writer.writerow(hdict)
423 for ce in self.units:
424 cedict = ce.todict()
425 writer.writerow(cedict)
426 return outputfile.getvalue()
427