From 3c548ab524e30267e03d5050b30f75d709027bdc Mon Sep 17 00:00:00 2001 From: Petter Rodhelind Date: Sun, 21 Feb 2021 18:31:28 +0100 Subject: Fix newlines by parsing paragraphs better. --- parser.go | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/parser.go b/parser.go index 8bae5a9..58ce9bf 100644 --- a/parser.go +++ b/parser.go @@ -87,17 +87,18 @@ func parseNode(n *html.Node, p *post) { } } } + + // parse paragraphs + if n.Data == "p" { + parseParagraph(n, p) + p.Content += "\n\n" + return + } } - // fetch all pure text elements + // fetch all pure text elements outside proper post paragraphs if n.Type == html.TextNode { - nl := "\n" // newlines - if n.Parent.Type == html.ElementNode && n.Parent.Data == "p" { - nl = "\n\n" - } - if n.Data != "..." { - p.Content += strings.Trim(n.Data, " ") + nl - } + p.Content += n.Data + "\n" } // loop deeper @@ -105,3 +106,15 @@ func parseNode(n *html.Node, p *post) { parseNode(c, p) } } + +func parseParagraph(n *html.Node, p *post) { + // fetch all pure text elements + if n.Type == html.TextNode { + p.Content += n.Data + } + + // loop deeper + for c := n.FirstChild; c != nil; c = c.NextSibling { + parseParagraph(c, p) + } +} -- cgit v1.2.3