aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetter Rodhelind <petter.rodhelind@gmail.com>2021-02-21 18:31:28 +0100
committerPetter Rodhelind <petter.rodhelind@gmail.com>2021-02-21 18:31:28 +0100
commit3c548ab524e30267e03d5050b30f75d709027bdc (patch)
tree8d147fd36427ce352beb43a118862c7582ba8039
parentcf8228f54b818691bf693b8a0773ca4981dbf138 (diff)
downloadfbfeed-3c548ab524e30267e03d5050b30f75d709027bdc.tar.gz
fbfeed-3c548ab524e30267e03d5050b30f75d709027bdc.tar.bz2
fbfeed-3c548ab524e30267e03d5050b30f75d709027bdc.zip
Fix newlines by parsing paragraphs better.
-rw-r--r--parser.go29
1 files changed, 21 insertions, 8 deletions
diff --git a/parser.go b/parser.go
index 8bae5a9..58ce9bf 100644
--- a/parser.go
+++ b/parser.go
@@ -87,17 +87,18 @@ func parseNode(n *html.Node, p *post) {
}
}
}
+
+ // parse paragraphs
+ if n.Data == "p" {
+ parseParagraph(n, p)
+ p.Content += "\n\n"
+ return
+ }
}
- // fetch all pure text elements
+ // fetch all pure text elements outside proper post paragraphs
if n.Type == html.TextNode {
- nl := "\n" // newlines
- if n.Parent.Type == html.ElementNode && n.Parent.Data == "p" {
- nl = "\n\n"
- }
- if n.Data != "..." {
- p.Content += strings.Trim(n.Data, " ") + nl
- }
+ p.Content += n.Data + "\n"
}
// loop deeper
@@ -105,3 +106,15 @@ func parseNode(n *html.Node, p *post) {
parseNode(c, p)
}
}
+
+func parseParagraph(n *html.Node, p *post) {
+ // fetch all pure text elements
+ if n.Type == html.TextNode {
+ p.Content += n.Data
+ }
+
+ // loop deeper
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ parseParagraph(c, p)
+ }
+}