diff options
-rw-r--r-- | main.go | 44 | ||||
-rw-r--r-- | parser.go | 67 |
2 files changed, 111 insertions, 0 deletions
@@ -0,0 +1,44 @@ +package main + +import ( + "fmt" + "net/http" + "time" +) + +type post struct { + Time time.Time + Content string +} + +func main() { + var groups []string + groups = append(groups, "") + + for i := range groups { + url := "https://www.facebook.com/pg/" + groups[i] + "/posts/" + + resp, err := http.Get(url) + if err != nil { + panic(err) + } + defer resp.Body.Close() + + ps, err := parse(resp.Body) + if err != nil { + panic(err) + } + + fmt.Println(url) + fmt.Printf("%s", present(ps)) + } +} + +func present(ps []post) (s string) { + for i := range ps { + s += ps[i].Time.String() + "\n" + s += ps[i].Content + "\n\n" + } + + return +} diff --git a/parser.go b/parser.go new file mode 100644 index 0000000..119c689 --- /dev/null +++ b/parser.go @@ -0,0 +1,67 @@ +package main + +import ( + "io" + "strconv" + "strings" + "time" + + "golang.org/x/net/html" +) + +// parse takes an io.Reader which is supposed to be an entire web page (like http.Response.Body) +// and returns the separate posts found inside. +func parse(r io.Reader) (ps []post, err error) { + doc, err := html.Parse(r) + if err != nil { + return + } + + var f func(*html.Node) + f = func(n *html.Node) { + if n.Type == html.ElementNode && n.Data == "div" { + for _, a := range n.Attr { + //if a.Key == "class" && strings.Contains(a.Val, "text_exposed_root") { + if a.Key == "class" && strings.Contains(a.Val, "userContentWrapper") { + var p post + parseNode(n, &p) + ps = append(ps, p) + break + } + } + } + + // loop further down + for c := n.FirstChild; c != nil; c = c.NextSibling { + f(c) + } + } + f(doc) + + return +} + +func parseNode(n *html.Node, p *post) { + if n.Type == html.ElementNode { + if n.Data == "abbr" { + for _, attr := range n.Attr { + if attr.Key == "data-utime" { + unix, _ := strconv.ParseInt(attr.Val, 10, 64) + p.Time = time.Unix(unix, 0) + return + } + } + } + } + + if n.Type == html.TextNode { + if n.Data != "..." { + p.Content += n.Data + "\n" + } + } + + // loop deeper + for c := n.FirstChild; c != nil; c = c.NextSibling { + parseNode(c, p) + } +} |