aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetter Rodhelind <petter.rodhelind@gmail.com>2021-02-21 17:40:05 +0100
committerPetter Rodhelind <petter.rodhelind@gmail.com>2021-02-21 17:40:05 +0100
commite618c5b876155c71cf4f8aebe373fc9bc29716f9 (patch)
tree6531fb7c0fa367c6a1125e1a421f5ee90b577ee8
parentdc79f4c18a76f5f907b3061e2e08ae4923da40e2 (diff)
downloadfbfeed-e618c5b876155c71cf4f8aebe373fc9bc29716f9.tar.gz
fbfeed-e618c5b876155c71cf4f8aebe373fc9bc29716f9.tar.bz2
fbfeed-e618c5b876155c71cf4f8aebe373fc9bc29716f9.zip
RSS and HTML presentation of the feed.
-rw-r--r--.gitignore2
-rw-r--r--README.md14
-rw-r--r--main.go110
-rw-r--r--parser.go46
-rw-r--r--templates.go78
5 files changed, 224 insertions, 26 deletions
diff --git a/.gitignore b/.gitignore
index 8c047f4..f953b83 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,2 @@
.DS_Store
-fbrss
+fbfeed
diff --git a/README.md b/README.md
index 97d30ce..17fd76b 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,18 @@
-# fbrss
+# fbfeed
Web scraper that presents a public Facebook group's post as an RSS
-feed.
+feed or HTML page.
This is particular good for those of you who do not use Facebook,
but still end up in social networks and local communities where
gatherings and events are all organized using Facebook.
+
+## Usage
+
+Compile and run. Host it somewhere or locally.
+
+For HTML: `/nameOfGroup`
+
+For RSS: `/nameOfGroup.rss`
+
+`nameOfGroup` is the @-name in Facebook.
diff --git a/main.go b/main.go
index 51f17e6..0e7dbba 100644
--- a/main.go
+++ b/main.go
@@ -3,42 +3,112 @@ package main
import (
"fmt"
"net/http"
+ "strings"
"time"
)
+var outputMode string
+
+type channel struct {
+ Title string
+ Link string
+ Description string
+ Items []*post
+}
+
+func (c *channel) String() string {
+ var template string
+ switch outputMode {
+ case "html":
+ template = htmlRoot
+ case "rss":
+ template = rssRoot
+ }
+
+ var s string
+ s = strings.Replace(template, "{{title}}", c.Title, -1)
+ s = strings.Replace(s, "{{link}}", c.Link, -1)
+ s = strings.Replace(s, "{{description}}", c.Description, -1)
+ var items string
+ for i := range c.Items {
+ item := c.Items[i].String()
+ item = strings.Replace(item, "{{link}}", c.Link, -1)
+ items += item
+ }
+ s = strings.Replace(s, "{{items}}", items, 1)
+ return s
+}
+
type post struct {
Time time.Time
+ Link string
Content string
}
-func main() {
- var groups []string
- groups = append(groups, "")
+func (p *post) String() string {
+ var template string
+ switch outputMode {
+ case "html":
+ template = htmlItem
+ case "rss":
+ template = rssItem
+ }
+
+ var s string
+ // time format: Mon Jan 2 15:04:05 -0700 MST 2006
+ s = strings.Replace(template, "{{time}}", p.Time.Format("Mon, 2 Jan 2006 15:04:05 MST"), 2)
+ s = strings.Replace(s, "{{content}}", p.Content, 1)
+ return s
+}
- for i := range groups {
- url := "https://www.facebook.com/pg/" + groups[i] + "/posts/"
+type handler struct{}
- resp, err := http.Get(url)
- if err != nil {
- panic(err)
- }
- defer resp.Body.Close()
+func (h handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ group := r.URL.Path[1:]
+ if strings.HasSuffix(group, ".rss") {
+ group = strings.TrimSuffix(group, ".rss")
+ outputMode = "rss"
+ }
- ps, err := parse(resp.Body)
- if err != nil {
- panic(err)
- }
+ c, err := fetch(group)
+ if err != nil {
+ http.Error(w, fmt.Sprintf("error: %s", err), 400)
+ return
+ }
- fmt.Println(url)
- fmt.Printf("%s", present(ps))
+ if c == nil || len(c.Items) < 1 {
+ http.Error(w, fmt.Sprintf("%s", "group not found"), 400)
+ return
}
+
+ fmt.Fprintf(w, "%s\n", c.String())
+}
+
+func main() {
+ outputMode = "html"
+ fmt.Println("Serving: http://localhost:1212")
+ http.ListenAndServe(":1212", handler{})
}
-func present(ps []post) (s string) {
- for i := range ps {
- s += ps[i].Time.String() + "\n"
- s += ps[i].Content + "\n\n"
+func fetch(group string) (c *channel, err error) {
+ if group == "" {
+ return
}
+ url := "https://www.facebook.com/pg/" + group + "/posts/"
+
+ resp, err := http.Get(url)
+ if err != nil {
+ return
+ }
+ defer resp.Body.Close()
+
+ c, err = parse(resp.Body)
+ if err != nil {
+ return
+ }
+
+ c.Link = url
+
return
}
diff --git a/parser.go b/parser.go
index 119c689..8bae5a9 100644
--- a/parser.go
+++ b/parser.go
@@ -11,7 +11,8 @@ import (
// parse takes an io.Reader which is supposed to be an entire web page (like http.Response.Body)
// and returns the separate posts found inside.
-func parse(r io.Reader) (ps []post, err error) {
+func parse(r io.Reader) (c *channel, err error) {
+ c = &channel{}
doc, err := html.Parse(r)
if err != nil {
return
@@ -19,13 +20,31 @@ func parse(r io.Reader) (ps []post, err error) {
var f func(*html.Node)
f = func(n *html.Node) {
+ if n.Type == html.ElementNode && n.Data == "meta" {
+ var prop string
+ for _, a := range n.Attr {
+ if a.Key == "property" {
+ prop = a.Val
+ }
+ if a.Key == "content" {
+ switch prop {
+ case "og:title":
+ c.Title = a.Val
+ case "og:description":
+ c.Description = a.Val
+ }
+ }
+ }
+
+ }
+
if n.Type == html.ElementNode && n.Data == "div" {
for _, a := range n.Attr {
//if a.Key == "class" && strings.Contains(a.Val, "text_exposed_root") {
if a.Key == "class" && strings.Contains(a.Val, "userContentWrapper") {
var p post
parseNode(n, &p)
- ps = append(ps, p)
+ c.Items = append(c.Items, &p)
break
}
}
@@ -43,6 +62,7 @@ func parse(r io.Reader) (ps []post, err error) {
func parseNode(n *html.Node, p *post) {
if n.Type == html.ElementNode {
+ // fetch time stamp from header
if n.Data == "abbr" {
for _, attr := range n.Attr {
if attr.Key == "data-utime" {
@@ -52,11 +72,31 @@ func parseNode(n *html.Node, p *post) {
}
}
}
+ // skip post header with name of group and check-in
+ if n.Data == "h5" {
+ return
+ }
+ // skip "show more" and aria-hidden stuff
+ if n.Data == "span" {
+ for _, attr := range n.Attr {
+ if attr.Key == "class" && attr.Val == "text_exposed_hide" {
+ return
+ }
+ if attr.Key == "aria-hidden" && attr.Val == "true" {
+ return
+ }
+ }
+ }
}
+ // fetch all pure text elements
if n.Type == html.TextNode {
+ nl := "\n" // newlines
+ if n.Parent.Type == html.ElementNode && n.Parent.Data == "p" {
+ nl = "\n\n"
+ }
if n.Data != "..." {
- p.Content += n.Data + "\n"
+ p.Content += strings.Trim(n.Data, " ") + nl
}
}
diff --git a/templates.go b/templates.go
new file mode 100644
index 0000000..8f6453f
--- /dev/null
+++ b/templates.go
@@ -0,0 +1,78 @@
+package main
+
+const rssRoot = `
+hejhej<?xml version="1.0" encoding="utf-8" standalone="yes"?>
+<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
+ <channel>
+ <title>{{title}}</title>
+ <link>{{link}}</link>
+ <description>{{description}}</description>
+ <generator>fbfeed</generator>
+ {{items}}
+ </channel>
+</rss>
+`
+
+const rssItem = `
+<item>
+ <title>{{time}}</title>
+ <pubDate>{{time}}</pubDate>
+ <link>{{link}}</link>
+ <description>{{content}}</description>
+</item>
+`
+
+const htmlRoot = `
+<!DOCTYPE html>
+<html>
+<head>
+ <title>{{title}}&middot; fbfeed</title>
+ <style type="text/css">
+ body {
+ font-size: 14pt;
+ line-height: 1.2;
+ max-width: 1200px;
+ margin: 0 auto;
+ padding: 0.5em;
+ word-wrap: break-word;
+ }
+ article {
+ margin-bottom: 1em;
+ }
+ article datetime {
+ font-weight: bold;
+ }
+ article pre {
+ margin: 0;
+ padding-left: 1em;
+ font-family: inherit;
+ word-wrap: break-word;
+ white-space: pre-line;
+ }
+ </style>
+</head>
+<body>
+ <header>
+ <h1>{{title}}</h1>
+ <a href="{{link}}">{{link}}</a>
+ <p>{{description}}</p>
+ </header>
+ <hr>
+ <main>
+ {{items}}
+ </main>
+ <hr>
+ <footer>
+ <small>Generated by fbfeed</small>
+ </footer>
+</body>
+
+</html>
+`
+
+const htmlItem = `
+<article>
+<datetime>{{time}}</datetime>
+<pre>{{content}}</pre>
+</article>
+`