#!ruby # -*- mode: ruby; coding: utf-8 -*- # Last updated: <2016/10/23 03:56:59 +0900> # # StylePix Language file check. # output __tmp.csv (UTF-16LE + BOM) output_file = "__tmp.csv" # StylePix Language files filelist = [ 'English.txt', 'Japanese.txt', 'Jp_m256_20161023.txt', 'Arabic.txt', 'ChineseSimplified.txt', 'ChineseTraditional.txt', 'French.txt', 'German.txt', 'Italian.txt', 'Welsh.txt', 'korean.txt', # please rename ] # pickup language files pickuplist = [ 'English.txt', 'Japanese.txt', 'Jp_m256_20161023.txt', ] # UTF-16LE string re_comment = Regexp.new( '^\/\/'.to_s.encode( "UTF-16LE" )) re_blankline = Regexp.new( '^\s+$'.to_s.encode( "UTF-16LE" )) re_word_tab_word = Regexp.new( '^(\S+)\s+\"(.+)\"$'.to_s.encode( "UTF-16LE" )) re_start_quot = Regexp.new( '^"'.to_s.encode( "UTF-16LE" )) re_end_quot = Regexp.new( '\"$'.to_s.encode( "UTF-16LE" )) nullstr = "".encode("UTF-16LE") sep = "\t".encode("UTF-16LE") # File read data = {} id_lst = {} filelist.each do |fn| dt = {} f = File.open(fn, 'rb:BOM|UTF-16LE') while l = f.gets l.chomp! next if l =~ re_comment # skip comment next if l =~ re_blankline # skip blank line if l =~ re_word_tab_word id, value = $1, $2 dt[id] = value if id_lst.has_key?(id) id_lst[id] += 1 else id_lst[id] = 1 end end end f.close data[fn] = dt end # make csv res = [] header = [] header.push("ID".encode("UTF-16LE")) pickuplist.each do |fn| header.push(fn.encode("UTF-16LE")) end res.push(header) id_lst.keys.each do |id| dt = [] dt.push(id) pickuplist.each do |fn| v = data[fn] dt.push((v.has_key?(id))? v[id] : nullstr) end res.push(dt) end # output csv (UTF-16LE + BOM) f = open(output_file, 'wb:UTF-16LE') f.write "\uFEFF" # BOM res.each do |l| f.puts l.join(sep) end f.close