gen_wordlist.rb (2621B)
1 #! /usr/bin/env ruby 2 3 require 'csv' 4 5 if ARGV.size != 2 6 $stderr.puts "Usage: #{$0} language out_file" 7 exit 1 8 end 9 10 case ARGV[0] 11 when 'c++', 'cxx' 12 prefix = '' 13 suffix = '' 14 array_prefix = 'constexpr const char* %s[] = { ' 15 array_suffix = ' };' 16 when 'ruby' 17 prefix = '# frozen_string_literal: true' 18 suffix = <<'EOS' 19 require 'securerandom' 20 21 def udcrr 22 adv = ADVERBS[SecureRandom.random_number ADVERBS.size] 23 adj = ADJECTIVES[SecureRandom.random_number ADJECTIVES.size] 24 noun = NOUNS[SecureRandom.random_number NOUNS.size] 25 verb = VERBS[SecureRandom.random_number VERBS.size] 26 num = SecureRandom.random_number 10000 27 "%s-%s-%s-%s-%04d" % [adv, adj, noun, verb, num] 28 end 29 EOS 30 array_prefix = '%s = [ ' 31 array_suffix = ' ].freeze' 32 when 'lua' 33 prefix = '' 34 suffix = <<'EOS' 35 local adv_size = #ADVERBS+1 36 local adj_size = #ADJECTIVES+1 37 local noun_size = #NOUNS+1 38 local verb_size = #VERBS+1 39 local floor, format = math.floor, string.format 40 41 local ffi = require("ffi") 42 ffi.cdef("int RAND_bytes(unsigned char *buf, int num);") 43 local ssl = ffi.load("crypto") 44 45 local num = ffi.new("uint32_t[1]") 46 local arg = ffi.cast("unsigned char*", num) 47 local function rand(max) 48 local trunc = floor(0xffffffff / max) * max 49 repeat 50 assert(ssl.RAND_bytes(arg, 4) == 1) 51 until num[0] < trunc 52 return num[0] % max 53 end 54 55 return function () 56 local adv = ADVERBS[rand(adv_size)] 57 local adj = ADJECTIVES[rand(adj_size)] 58 local noun = NOUNS[rand(noun_size)] 59 local verb = VERBS[rand(verb_size)] 60 local num = rand(10000) 61 return format("%s-%s-%s-%s-%04d", adv, adj, noun, verb, num) 62 end 63 EOS 64 array_prefix = 'local %s = {[0]= ' 65 array_suffix = '}' 66 else 67 fail "Unknown language" 68 end 69 70 define_method :gen_list do |fname, name, out| 71 words = File.foreach(fname).lazy. 72 map {|l| CSV.parse_line(l.gsub '\"', '""')[1] }. 73 drop(1).select {|x| x =~ /^[0-9a-zA-Z_'-]*$/ }. 74 map {|x| x.gsub(/[_']/, '-').downcase }.force 75 out.write array_prefix % name 76 out.write words.map(&:inspect).join ", " 77 out.puts array_suffix 78 puts "#{words.size} #{name}" 79 end 80 81 f = File.open ARGV[1], 'w' 82 f.puts prefix 83 84 # wordlist from wiktionary 85 # https://petscan.wmflabs.org/?psid=3899124&format=csv 86 gen_list 'adverb_list.csv', 'ADVERBS', f 87 # https://petscan.wmflabs.org/?psid=3864045&format=csv 88 gen_list 'adjective_list.csv', 'ADJECTIVES', f 89 # https://petscan.wmflabs.org/?psid=3864050&format=csv 90 gen_list 'noun_list.csv', 'NOUNS', f 91 # base verb forms, not used https://petscan.wmflabs.org/?psid=3899120&format=csv 92 # https://petscan.wmflabs.org/?psid=3900357&format=csv 93 gen_list 'verb_list.csv', 'VERBS', f 94 95 f.puts suffix