# -*- mode: ruby; encoding: utf-8 -*-
require 'uri'
require 'date'
require 'yaml'
module HTMLUtils
ESC = {
'&' => '&',
'"' => '"',
'<' => '<',
'>' => '>'
}
def escape(str)
table = ESC # optimize
str.gsub(/[&"<>]/u) {|s| table[s]}
end
CODE = {
'<' => '<',
'>' => '>',
'&' => '&'
}
def code_escape(str)
table = CODE
str.gsub(/[<>&]/u) {|s| table[s]}
end
URIENC = {
'(' => '%28',
')' => '%29',
' ' => '%20'
}
def uri_encode(str)
table = URIENC
str.gsub(/[\(\) ]/u) {|s| table[s]}
end
def urldecode(str)
str.gsub(/[A-F\d]{2}/u) {|x| [x.hex].pack('C*')}
end
end
class PukiWikiParser
include HTMLUtils
def initialize()
@h_start_level = 2
end
def filename(pw_name)
decoded_name = HTMLUtils.urldecode(pw_name).sub(/\:/, '_').downcase.split("/").last
name = decoded_name.sub(/\.txt\z/, '.md')
if @timestamp.nil? || @timestamp.size===0
return name
else
return "#{@timestamp}-#{name}"
end
end
def has_pubdate
@timestamp != ''
end
def to_md(src, page_names, page, base_uri = 'https://ateraimemo.com/', suffix= '/')
@page_names = page_names
@base_uri = base_uri
@page = page.sub(/\ASwing\/(.+)\.txt\z/) { $1 }
@pagelist_suffix = suffix
@inline_re = nil
@timestamp = ''
head = []
buf = []
@FRONT_MATTER_REGEX ||= %r<
\A---[\r\n](.*?)[\r\n]---[\r\n](.*)
>mx
if @FRONT_MATTER_REGEX =~ src.lstrip then
frontmatter = $1
body = $2
# yaml = YAML.load(frontmatter)
# yaml['layout'] = 'post'
# yaml['category'] = 'swing'
# yaml['folder'] = @page
# yaml['comments'] = true
# if yaml.key?('noindex') then
# return ''
# end
# @timestamp = yaml['pubdate'].strftime('%Y-%m-%d')
# head.push(yaml.to_yaml.rstrip)
head.push("---")
head.push("layout: post")
#head.push("category: swing")
#head.push("folder: #{@page}")
heads = frontmatter.rstrip.split(/\r?\n/).map {|line| line.chomp }
while heads.first
case heads.first
when /\Apubdate: /
pubdate = heads.shift
head.push pubdate
@timestamp = DateTime.parse(pubdate.sub(/\Apubdate: /, '')).strftime('%Y-%m-%d')
when /\Anoindex: /
@timestamp = ''
return ''
else
head.push heads.shift
end
end
head.push("comments: true")
head.push("---\n")
else
body = src
end
lines = body.rstrip.split(/\r?\n/).map {|line| line.chomp }
while lines.first
case lines.first
when ''
buf.push lines.shift
when /\A----/
lines.shift
buf.push '- - - -' #hr
when /\A\*/
buf.push parse_h(lines.shift)
when /\A\#code.*\{\{/
buf.concat parse_pre2(take_multi_block(lines))
when /\A\#.+/
bp = parse_block_plugin(lines.shift)
buf.push bp unless bp.nil?
when /\A\s/
buf.concat parse_pre(take_block(lines, /\A\s/))
when /\A\/\//
#buf.concat parse_comment(take_block(lines, /\A\/\//))
take_block(lines, /\A\/\//)
when /\A>/
buf.concat parse_quote(take_block(lines, /\A>/))
when /\A-/
buf.concat parse_list('ul', take_list_block(lines))
when /\A\+/
buf.concat parse_list('ol', take_block(lines, /\A\+/))
when /\A:/
buf.concat parse_dl(take_block(lines, /\A:/))
else
buf.concat parse_p(take_block(lines, /\A(?![*\s>:\-\+\#]|----|\z)/))
end
end
buf.join("\n")
head.join("\n").concat(buf.join("\n"))
end
private
def take_block(lines, marker)
buf = []
until lines.empty?
break unless marker =~ lines.first
if /\A\/\// =~ lines.first then
lines.shift
else
buf.push lines.shift.sub(marker, '')
end
end
buf
end
def take_multi_block(lines)
buf = []
until lines.empty?
l = lines.shift
break if /\A\}\}\z/ =~ l
next if /\A\#code.*\z/ =~ l
buf.push l
end
buf
end
def parse_h(line)
level = @h_start_level + (line.slice(/\A\*{1,4}/).length - 1)
h = "#"*level
# content = line.sub(/\A\*+/, '')
##content = line.gsub(/\A\*+(.+) \[#\w+\]$/) { $1 }
#"
|.concat(line), array.join("\n"), "
"]
blockflag = false
elsif line.start_with?('#') then
unless blockflag then
blockflag = true
buf.push h
end
x = "\t"*2
line = code_escape(line.sub(/\A\#\s/, ''))
buf.push "#{h}#{x}#{line}"
elsif line.start_with?('']
end
def parse_p(lines)
lines.map {|line| parse_inline(line)}
end
def parse_inline(str)
str = str.gsub(/%%(?!%)((?:(?!%%).)*)%%/) { ['~~', $1, '~~'].join() } #nest: