aboutsummaryrefslogtreecommitdiff
path: root/parser.go
diff options
context:
space:
mode:
Diffstat (limited to 'parser.go')
-rw-r--r--parser.go39
1 files changed, 31 insertions, 8 deletions
diff --git a/parser.go b/parser.go
index 58ce9bf..4d3720b 100644
--- a/parser.go
+++ b/parser.go
@@ -40,7 +40,6 @@ func parse(r io.Reader) (c *channel, err error) {
if n.Type == html.ElementNode && n.Data == "div" {
for _, a := range n.Attr {
- //if a.Key == "class" && strings.Contains(a.Val, "text_exposed_root") {
if a.Key == "class" && strings.Contains(a.Val, "userContentWrapper") {
var p post
parseNode(n, &p)
@@ -62,8 +61,9 @@ func parse(r io.Reader) (c *channel, err error) {
func parseNode(n *html.Node, p *post) {
if n.Type == html.ElementNode {
+ switch n.Data {
// fetch time stamp from header
- if n.Data == "abbr" {
+ case "abbr":
for _, attr := range n.Attr {
if attr.Key == "data-utime" {
unix, _ := strconv.ParseInt(attr.Val, 10, 64)
@@ -71,13 +71,15 @@ func parseNode(n *html.Node, p *post) {
return
}
}
- }
+
// skip post header with name of group and check-in
- if n.Data == "h5" {
+ case "h5":
return
- }
+
// skip "show more" and aria-hidden stuff
- if n.Data == "span" {
+ case "span":
+ fallthrough
+ case "a":
for _, attr := range n.Attr {
if attr.Key == "class" && attr.Val == "text_exposed_hide" {
return
@@ -86,13 +88,17 @@ func parseNode(n *html.Node, p *post) {
return
}
}
- }
// parse paragraphs
- if n.Data == "p" {
+ case "p":
parseParagraph(n, p)
p.Content += "\n\n"
return
+
+ // get images
+ case "img":
+ parseImage(n, p)
+ return
}
}
@@ -118,3 +124,20 @@ func parseParagraph(n *html.Node, p *post) {
parseParagraph(c, p)
}
}
+
+func parseImage(n *html.Node, p *post) {
+ img := &image{}
+ for _, attr := range n.Attr {
+ switch attr.Key {
+ case "src":
+ img.Source = attr.Val
+ case "alt":
+ fallthrough
+ case "title":
+ fallthrough
+ case "aria-label":
+ img.Caption = attr.Val
+ }
+ }
+ p.Images = append(p.Images, img)
+}