update_generators.rb (3631B)
1 #! /usr/bin/env ruby 2 # frozen_string_literal: true 3 4 require 'addressable' 5 require 'async' 6 require 'async/barrier' 7 require 'async/http/internet' 8 require 'json' 9 require 'nokogiri' 10 11 LOG = Console.logger 12 INTERNETS = Async::HTTP::Internet.new 13 CACHE = ENV['CACHE'] 14 MAX_DL = 16 15 BASE_URL = 'https://www.fantasynamegenerators.com/' 16 OUT = File.join __dir__, 'generators' 17 18 FileUtils.rm_rf OUT 19 FileUtils.mkdir_p OUT 20 21 def get url 22 url = Addressable::URI.parse(url).display_uri.to_s 23 if CACHE 24 fn = File.join CACHE, url.tr('/', '\\') 25 (return File.read fn) rescue nil 26 end 27 LOG.info "Getting #{url}" 28 res = INTERNETS.get url, [['user-agent', 'fuck you']] 29 raise "Bad status #{res.status}" unless res.status == 200 30 body = res.read 31 File.write fn, body if CACHE 32 body 33 end 34 35 def mk_name s 36 s.strip! 37 s.gsub! %r{ +>\z}, '' 38 s.gsub! %r{ *- *new!$}i, '' 39 s.gsub! 'Gens.', 'Generator' 40 s.gsub! 'Descr.', 'Description' 41 s 42 end 43 44 PAGES = {} 45 46 def parse_tree path, ul 47 ul.xpath('./li|./ol/li').each do |li| 48 if ul2 = li.at_xpath('./ul|./ol') 49 x = mk_name li.children[0].inner_text 50 next if x == 'Contact & Support' 51 if ul2[:id] == 'splitNav' 52 ul2 = ul2.at_css 'ul#rlAll' # note: multiple elements with id=rlAll !!! 53 end 54 parse_tree path + [x], ul2 55 elsif a = li.at_xpath('.//a') 56 next if a[:href] =~ %r{/} || a[:href] == 'thankyou.php' || 57 a.inner_text == 'More soon!' 58 if PAGES[a[:href]] 59 LOG.warn "Duplicate #{a}" 60 else 61 PAGES[a[:href]] = path + [mk_name(a.inner_text)] 62 end 63 else 64 fail 'eek' 65 end 66 end 67 end 68 69 def generator_name url 70 File.basename(url).gsub(/\?.*/, '').unicode_normalize(:nfkd).chars. 71 select(&:ascii_only?).join 72 end 73 74 JSON_OUT = { 75 paths: {}, 76 } 77 78 def deep_sort h 79 return h unless h.is_a? Hash 80 h2 = h.sort.to_h 81 h.clear 82 h.merge! h2 83 84 h.each {|k,v| deep_sort v } 85 end 86 87 JS_SHIT = /\Aif *\(!window\.__cfRLUnblockHandlers\) *return +false; */ 88 89 def get_page url, path 90 html = Nokogiri::HTML5.parse get "#{BASE_URL}#{url}" 91 js_url = html.css('script[src^=scripts]').map {|x| x[:src] }. 92 grep_v(/savingNames\.js/).first 93 js = get "#{BASE_URL}#{js_url}" 94 name = generator_name(js_url) 95 File.write File.join(OUT, name), js 96 fail 'js-beautify' unless system 'js-beautify', '-rn', File.join(OUT, name) 97 98 x = JSON_OUT[:paths] 99 path.each do |p| 100 x[p] ||= {} 101 x = x[p] 102 end 103 104 btns = html.css('input[type=button][onclick]').reject do |x| 105 x[:onclick] =~ /randomize\(\)/ 106 end.map do |x| 107 [x[:value].gsub(/AGet ([a-zA-Z])/, &:upcase), x[:onclick].gsub(JS_SHIT, '')] 108 end 109 110 has_1 = !!html.at_css('input#firChange') 111 has_2 = !!html.at_css('input#firChange') 112 base_json = { file: name, has_1:, has_2: } 113 114 if btns.size > 1 115 btns.each do |(k, fun)| 116 x[k] = { call: fun }.merge base_json 117 end 118 return 119 end 120 121 if radio = html.at_css('form#radioChoice') 122 radio.css('input[type=radio]').each do |r| 123 call = "radio_value = #{r[:value].inspect};#{btns[0][1]}" 124 x[r.next.inner_text.strip] = { call: }.merge base_json 125 end 126 return 127 end 128 129 x.merge! base_json 130 x.merge! call: btns[0][1] 131 end 132 133 Async do |task| 134 html = Nokogiri::HTML5.parse get BASE_URL 135 parse_tree [], html.at_css('#navmenus > ul.navmenu') 136 barrier = Async::Barrier.new 137 sema = Async::Semaphore.new MAX_DL, parent: task 138 PAGES.each do |url, path| 139 # next unless url =~ /call/ 140 barrier.async do 141 sema.acquire do 142 get_page url, path 143 end 144 end 145 end 146 barrier.wait 147 148 deep_sort JSON_OUT[:paths] 149 File.write File.join(OUT, 'generators.js'), JSON.pretty_generate(JSON_OUT) 150 end