You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

151 lines
3.5 KiB
Ruby

#! /usr/bin/env ruby
# frozen_string_literal: true
require 'addressable'
require 'async'
require 'async/barrier'
require 'async/http/internet'
require 'json'
require 'nokogiri'
LOG = Console.logger
INTERNETS = Async::HTTP::Internet.new
CACHE = ENV['CACHE']
MAX_DL = 16
BASE_URL = 'https://www.fantasynamegenerators.com/'
OUT = File.join __dir__, 'generators'
FileUtils.rm_rf OUT
FileUtils.mkdir_p OUT
def get url
url = Addressable::URI.parse(url).display_uri.to_s
if CACHE
fn = File.join CACHE, url.tr('/', '\\')
(return File.read fn) rescue nil
end
LOG.info "Getting #{url}"
res = INTERNETS.get url, [['user-agent', 'fuck you']]
raise "Bad status #{res.status}" unless res.status == 200
body = res.read
File.write fn, body if CACHE
body
end
def mk_name s
s.strip!
s.gsub! %r{ +>\z}, ''
s.gsub! %r{ *- *new!$}i, ''
s.gsub! 'Gens.', 'Generator'
s.gsub! 'Descr.', 'Description'
s
end
PAGES = {}
def parse_tree path, ul
ul.xpath('./li|./ol/li').each do |li|
if ul2 = li.at_xpath('./ul|./ol')
x = mk_name li.children[0].inner_text
next if x == 'Contact & Support'
if ul2[:id] == 'splitNav'
ul2 = ul2.at_css 'ul#rlAll' # note: multiple elements with id=rlAll !!!
end
parse_tree path + [x], ul2
elsif a = li.at_xpath('.//a')
next if a[:href] =~ %r{/} || a[:href] == 'thankyou.php' ||
a.inner_text == 'More soon!'
if PAGES[a[:href]]
LOG.warn "Duplicate #{a}"
else
PAGES[a[:href]] = path + [mk_name(a.inner_text)]
end
else
fail 'eek'
end
end
end
def generator_name url
File.basename(url).gsub(/\?.*/, '').unicode_normalize(:nfkd).chars.
select(&:ascii_only?).join
end
JSON_OUT = {
paths: {},
}
def deep_sort h
return h unless h.is_a? Hash
h2 = h.sort.to_h
h.clear
h.merge! h2
h.each {|k,v| deep_sort v }
end
JS_SHIT = /\Aif *\(!window\.__cfRLUnblockHandlers\) *return +false; */
def get_page url, path
html = Nokogiri::HTML5.parse get "#{BASE_URL}#{url}"
js_url = html.css('script[src^=scripts]').map {|x| x[:src] }.
grep_v(/savingNames\.js/).first
js = get "#{BASE_URL}#{js_url}"
name = generator_name(js_url)
File.write File.join(OUT, name), js
fail 'js-beautify' unless system 'js-beautify', '-rn', File.join(OUT, name)
x = JSON_OUT[:paths]
path.each do |p|
x[p] ||= {}
x = x[p]
end
btns = html.css('input[type=button][onclick]').reject do |x|
x[:onclick] =~ /randomize\(\)/
end.map do |x|
[x[:value].gsub(/AGet ([a-zA-Z])/, &:upcase), x[:onclick].gsub(JS_SHIT, '')]
end
has_1 = !!html.at_css('input#firChange')
has_2 = !!html.at_css('input#firChange')
base_json = { file: name, has_1:, has_2: }
if btns.size > 1
btns.each do |(k, fun)|
x[k] = { call: fun }.merge base_json
end
return
end
if radio = html.at_css('form#radioChoice')
radio.css('input[type=radio]').each do |r|
call = "radio_value = #{r[:value].inspect};#{btns[0][1]}"
x[r.next.inner_text.strip] = { call: }.merge base_json
end
return
end
x.merge! base_json
x.merge! call: btns[0][1]
end
Async do |task|
html = Nokogiri::HTML5.parse get BASE_URL
parse_tree [], html.at_css('#navmenus > ul.navmenu')
barrier = Async::Barrier.new
sema = Async::Semaphore.new MAX_DL, parent: task
PAGES.each do |url, path|
# next unless url =~ /call/
barrier.async do
sema.acquire do
get_page url, path
end
end
end
barrier.wait
deep_sort JSON_OUT[:paths]
File.write File.join(OUT, 'generators.js'), JSON.pretty_generate(JSON_OUT)
end