aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetter Rodhelind <petter.rodhelind@gmail.com>2021-02-22 00:42:08 +0100
committerPetter Rodhelind <petter.rodhelind@gmail.com>2021-02-22 00:42:08 +0100
commit449f35d4fd50cae9b4c1fe4dd4b7003fd4e7b8e6 (patch)
treedad32ff6bfbe07cb8454e3cb8e90112be4379d34
parentb04e3bc0595c88303a87858108a938eb62bd35cd (diff)
downloadfbfeed-449f35d4fd50cae9b4c1fe4dd4b7003fd4e7b8e6.tar.gz
fbfeed-449f35d4fd50cae9b4c1fe4dd4b7003fd4e7b8e6.tar.bz2
fbfeed-449f35d4fd50cae9b4c1fe4dd4b7003fd4e7b8e6.zip
Parse links and get better at hiding stuff.
-rw-r--r--parser.go45
1 files changed, 43 insertions, 2 deletions
diff --git a/parser.go b/parser.go
index 4d3720b..bf9e837 100644
--- a/parser.go
+++ b/parser.go
@@ -2,6 +2,7 @@ package main
import (
"io"
+ "net/url"
"strconv"
"strings"
"time"
@@ -76,14 +77,20 @@ func parseNode(n *html.Node, p *post) {
case "h5":
return
- // skip "show more" and aria-hidden stuff
+ // skip "show more", aria-hidden and accessible stuff
case "span":
fallthrough
case "a":
+ fallthrough
+ case "div":
for _, attr := range n.Attr {
if attr.Key == "class" && attr.Val == "text_exposed_hide" {
return
}
+ if attr.Key == "class" && strings.Contains(attr.Val, "accessible_elem") {
+ return
+ }
+
if attr.Key == "aria-hidden" && attr.Val == "true" {
return
}
@@ -104,7 +111,11 @@ func parseNode(n *html.Node, p *post) {
// fetch all pure text elements outside proper post paragraphs
if n.Type == html.TextNode {
- p.Content += n.Data + "\n"
+ p.Content += n.Data
+ if n.Parent.Type == html.ElementNode && n.Parent.Data == "a" {
+ parseLink(n.Parent, p)
+ }
+ p.Content += "\n"
}
// loop deeper
@@ -117,6 +128,16 @@ func parseParagraph(n *html.Node, p *post) {
// fetch all pure text elements
if n.Type == html.TextNode {
p.Content += n.Data
+ if n.Parent.Type == html.ElementNode && n.Parent.Data == "a" {
+ parseLink(n.Parent, p)
+ return
+ }
+ }
+
+ // normal linebreak within paragraphs
+ if n.Type == html.ElementNode && n.Data == "br" {
+ p.Content += "\n"
+ return
}
// loop deeper
@@ -141,3 +162,23 @@ func parseImage(n *html.Node, p *post) {
}
p.Images = append(p.Images, img)
}
+
+func parseLink(n *html.Node, p *post) {
+ for _, attr := range n.Attr {
+ switch attr.Key {
+ case "href":
+ urlraw, _ := url.PathUnescape(attr.Val)
+ url, _ := url.Parse(urlraw)
+ external := url.Query().Get("u")
+ if external != "" {
+ p.Content += " (" + external + ")"
+ } else {
+ base := strings.Split(urlraw, "?")[0]
+ if base[0] == '/' {
+ base = "https://www.facebook.com" + base
+ }
+ p.Content += " (" + base + ")"
+ }
+ }
+ }
+}