aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetter Rodhelind <petter.rodhelind@gmail.com>2021-02-21 14:39:50 +0100
committerPetter Rodhelind <petter.rodhelind@gmail.com>2021-02-21 14:39:50 +0100
commitdc79f4c18a76f5f907b3061e2e08ae4923da40e2 (patch)
treeae0e93c94b2a9959cd2f3d6ae01a30c59e3f2930
parent946f97ae713c019196c115f87bf91752c44f9578 (diff)
downloadfbfeed-dc79f4c18a76f5f907b3061e2e08ae4923da40e2.tar.gz
fbfeed-dc79f4c18a76f5f907b3061e2e08ae4923da40e2.tar.bz2
fbfeed-dc79f4c18a76f5f907b3061e2e08ae4923da40e2.zip
Functional parser printing plaintext to stdout.
-rw-r--r--main.go44
-rw-r--r--parser.go67
2 files changed, 111 insertions, 0 deletions
diff --git a/main.go b/main.go
new file mode 100644
index 0000000..51f17e6
--- /dev/null
+++ b/main.go
@@ -0,0 +1,44 @@
+package main
+
+import (
+ "fmt"
+ "net/http"
+ "time"
+)
+
+type post struct {
+ Time time.Time
+ Content string
+}
+
+func main() {
+ var groups []string
+ groups = append(groups, "")
+
+ for i := range groups {
+ url := "https://www.facebook.com/pg/" + groups[i] + "/posts/"
+
+ resp, err := http.Get(url)
+ if err != nil {
+ panic(err)
+ }
+ defer resp.Body.Close()
+
+ ps, err := parse(resp.Body)
+ if err != nil {
+ panic(err)
+ }
+
+ fmt.Println(url)
+ fmt.Printf("%s", present(ps))
+ }
+}
+
+func present(ps []post) (s string) {
+ for i := range ps {
+ s += ps[i].Time.String() + "\n"
+ s += ps[i].Content + "\n\n"
+ }
+
+ return
+}
diff --git a/parser.go b/parser.go
new file mode 100644
index 0000000..119c689
--- /dev/null
+++ b/parser.go
@@ -0,0 +1,67 @@
+package main
+
+import (
+ "io"
+ "strconv"
+ "strings"
+ "time"
+
+ "golang.org/x/net/html"
+)
+
+// parse takes an io.Reader which is supposed to be an entire web page (like http.Response.Body)
+// and returns the separate posts found inside.
+func parse(r io.Reader) (ps []post, err error) {
+ doc, err := html.Parse(r)
+ if err != nil {
+ return
+ }
+
+ var f func(*html.Node)
+ f = func(n *html.Node) {
+ if n.Type == html.ElementNode && n.Data == "div" {
+ for _, a := range n.Attr {
+ //if a.Key == "class" && strings.Contains(a.Val, "text_exposed_root") {
+ if a.Key == "class" && strings.Contains(a.Val, "userContentWrapper") {
+ var p post
+ parseNode(n, &p)
+ ps = append(ps, p)
+ break
+ }
+ }
+ }
+
+ // loop further down
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ f(c)
+ }
+ }
+ f(doc)
+
+ return
+}
+
+func parseNode(n *html.Node, p *post) {
+ if n.Type == html.ElementNode {
+ if n.Data == "abbr" {
+ for _, attr := range n.Attr {
+ if attr.Key == "data-utime" {
+ unix, _ := strconv.ParseInt(attr.Val, 10, 64)
+ p.Time = time.Unix(unix, 0)
+ return
+ }
+ }
+ }
+ }
+
+ if n.Type == html.TextNode {
+ if n.Data != "..." {
+ p.Content += n.Data + "\n"
+ }
+ }
+
+ // loop deeper
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ parseNode(c, p)
+ }
+}