You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
151 lines
3.5 KiB
Ruby
151 lines
3.5 KiB
Ruby
#! /usr/bin/env ruby
|
|
# frozen_string_literal: true
|
|
|
|
require 'addressable'
|
|
require 'async'
|
|
require 'async/barrier'
|
|
require 'async/http/internet'
|
|
require 'json'
|
|
require 'nokogiri'
|
|
|
|
LOG = Console.logger
|
|
INTERNETS = Async::HTTP::Internet.new
|
|
CACHE = ENV['CACHE']
|
|
MAX_DL = 16
|
|
BASE_URL = 'https://www.fantasynamegenerators.com/'
|
|
OUT = File.join __dir__, 'generators'
|
|
|
|
FileUtils.rm_rf OUT
|
|
FileUtils.mkdir_p OUT
|
|
|
|
def get url
|
|
url = Addressable::URI.parse(url).display_uri.to_s
|
|
if CACHE
|
|
fn = File.join CACHE, url.tr('/', '\\')
|
|
(return File.read fn) rescue nil
|
|
end
|
|
LOG.info "Getting #{url}"
|
|
res = INTERNETS.get url, [['user-agent', 'fuck you']]
|
|
raise "Bad status #{res.status}" unless res.status == 200
|
|
body = res.read
|
|
File.write fn, body if CACHE
|
|
body
|
|
end
|
|
|
|
def mk_name s
|
|
s.strip!
|
|
s.gsub! %r{ +>\z}, ''
|
|
s.gsub! %r{ *- *new!$}i, ''
|
|
s.gsub! 'Gens.', 'Generator'
|
|
s.gsub! 'Descr.', 'Description'
|
|
s
|
|
end
|
|
|
|
PAGES = {}
|
|
|
|
def parse_tree path, ul
|
|
ul.xpath('./li|./ol/li').each do |li|
|
|
if ul2 = li.at_xpath('./ul|./ol')
|
|
x = mk_name li.children[0].inner_text
|
|
next if x == 'Contact & Support'
|
|
if ul2[:id] == 'splitNav'
|
|
ul2 = ul2.at_css 'ul#rlAll' # note: multiple elements with id=rlAll !!!
|
|
end
|
|
parse_tree path + [x], ul2
|
|
elsif a = li.at_xpath('.//a')
|
|
next if a[:href] =~ %r{/} || a[:href] == 'thankyou.php' ||
|
|
a.inner_text == 'More soon!'
|
|
if PAGES[a[:href]]
|
|
LOG.warn "Duplicate #{a}"
|
|
else
|
|
PAGES[a[:href]] = path + [mk_name(a.inner_text)]
|
|
end
|
|
else
|
|
fail 'eek'
|
|
end
|
|
end
|
|
end
|
|
|
|
def generator_name url
|
|
File.basename(url).gsub(/\?.*/, '').unicode_normalize(:nfkd).chars.
|
|
select(&:ascii_only?).join
|
|
end
|
|
|
|
JSON_OUT = {
|
|
paths: {},
|
|
}
|
|
|
|
def deep_sort h
|
|
return h unless h.is_a? Hash
|
|
h2 = h.sort.to_h
|
|
h.clear
|
|
h.merge! h2
|
|
|
|
h.each {|k,v| deep_sort v }
|
|
end
|
|
|
|
JS_SHIT = /\Aif *\(!window\.__cfRLUnblockHandlers\) *return +false; */
|
|
|
|
def get_page url, path
|
|
html = Nokogiri::HTML5.parse get "#{BASE_URL}#{url}"
|
|
js_url = html.css('script[src^=scripts]').map {|x| x[:src] }.
|
|
grep_v(/savingNames\.js/).first
|
|
js = get "#{BASE_URL}#{js_url}"
|
|
name = generator_name(js_url)
|
|
File.write File.join(OUT, name), js
|
|
fail 'js-beautify' unless system 'js-beautify', '-rn', File.join(OUT, name)
|
|
|
|
x = JSON_OUT[:paths]
|
|
path.each do |p|
|
|
x[p] ||= {}
|
|
x = x[p]
|
|
end
|
|
|
|
btns = html.css('input[type=button][onclick]').reject do |x|
|
|
x[:onclick] =~ /randomize\(\)/
|
|
end.map do |x|
|
|
[x[:value].gsub(/AGet ([a-zA-Z])/, &:upcase), x[:onclick].gsub(JS_SHIT, '')]
|
|
end
|
|
|
|
has_1 = !!html.at_css('input#firChange')
|
|
has_2 = !!html.at_css('input#firChange')
|
|
base_json = { file: name, has_1:, has_2: }
|
|
|
|
if btns.size > 1
|
|
btns.each do |(k, fun)|
|
|
x[k] = { call: fun }.merge base_json
|
|
end
|
|
return
|
|
end
|
|
|
|
if radio = html.at_css('form#radioChoice')
|
|
radio.css('input[type=radio]').each do |r|
|
|
call = "radio_value = #{r[:value].inspect};#{btns[0][1]}"
|
|
x[r.next.inner_text.strip] = { call: }.merge base_json
|
|
end
|
|
return
|
|
end
|
|
|
|
x.merge! base_json
|
|
x.merge! call: btns[0][1]
|
|
end
|
|
|
|
Async do |task|
|
|
html = Nokogiri::HTML5.parse get BASE_URL
|
|
parse_tree [], html.at_css('#navmenus > ul.navmenu')
|
|
barrier = Async::Barrier.new
|
|
sema = Async::Semaphore.new MAX_DL, parent: task
|
|
PAGES.each do |url, path|
|
|
# next unless url =~ /call/
|
|
barrier.async do
|
|
sema.acquire do
|
|
get_page url, path
|
|
end
|
|
end
|
|
end
|
|
barrier.wait
|
|
|
|
deep_sort JSON_OUT[:paths]
|
|
File.write File.join(OUT, 'generators.js'), JSON.pretty_generate(JSON_OUT)
|
|
end
|