diff options
author | Petter Rodhelind <petter.rodhelind@gmail.com> | 2021-02-21 18:31:28 +0100 |
---|---|---|
committer | Petter Rodhelind <petter.rodhelind@gmail.com> | 2021-02-21 18:31:28 +0100 |
commit | 3c548ab524e30267e03d5050b30f75d709027bdc (patch) | |
tree | 8d147fd36427ce352beb43a118862c7582ba8039 | |
parent | cf8228f54b818691bf693b8a0773ca4981dbf138 (diff) | |
download | fbfeed-3c548ab524e30267e03d5050b30f75d709027bdc.tar.gz fbfeed-3c548ab524e30267e03d5050b30f75d709027bdc.tar.bz2 fbfeed-3c548ab524e30267e03d5050b30f75d709027bdc.zip |
Fix newlines by parsing paragraphs better.
-rw-r--r-- | parser.go | 29 |
1 files changed, 21 insertions, 8 deletions
@@ -87,17 +87,18 @@ func parseNode(n *html.Node, p *post) { } } } + + // parse paragraphs + if n.Data == "p" { + parseParagraph(n, p) + p.Content += "\n\n" + return + } } - // fetch all pure text elements + // fetch all pure text elements outside proper post paragraphs if n.Type == html.TextNode { - nl := "\n" // newlines - if n.Parent.Type == html.ElementNode && n.Parent.Data == "p" { - nl = "\n\n" - } - if n.Data != "..." { - p.Content += strings.Trim(n.Data, " ") + nl - } + p.Content += n.Data + "\n" } // loop deeper @@ -105,3 +106,15 @@ func parseNode(n *html.Node, p *post) { parseNode(c, p) } } + +func parseParagraph(n *html.Node, p *post) { + // fetch all pure text elements + if n.Type == html.TextNode { + p.Content += n.Data + } + + // loop deeper + for c := n.FirstChild; c != nil; c = c.NextSibling { + parseParagraph(c, p) + } +} |