package main

import (
	"io"
	"net/url"
	"strconv"
	"strings"
	"time"

	"golang.org/x/net/html"
)

// parse takes an io.Reader which is supposed to be an entire web page (like http.Response.Body)
// and returns the separate posts found inside.
func parse(r io.Reader) (c *channel, err error) {
	c = &channel{}
	doc, err := html.Parse(r)
	if err != nil {
		return
	}

	var f func(*html.Node)
	f = func(n *html.Node) {
		if n.Type == html.ElementNode && n.Data == "meta" {
			var prop string
			for _, a := range n.Attr {
				if a.Key == "property" {
					prop = a.Val
				}
				if a.Key == "content" {
					switch prop {
					case "og:title":
						c.Title = a.Val
					case "og:description":
						c.Description = a.Val
					}
				}
			}

		}

		if n.Type == html.ElementNode && n.Data == "div" {
			for _, a := range n.Attr {
				if a.Key == "class" && strings.Contains(a.Val, "userContentWrapper") {
					var p post
					parseNode(n, &p)
					c.Items = append(c.Items, &p)
					break
				}
			}
		}

		// loop further down
		for c := n.FirstChild; c != nil; c = c.NextSibling {
			f(c)
		}
	}
	f(doc)

	return
}

func parseNode(n *html.Node, p *post) {
	if n.Type == html.ElementNode {
		switch n.Data {
		// fetch time stamp from header
		case "abbr":
			for _, attr := range n.Attr {
				if attr.Key == "data-utime" {
					unix, _ := strconv.ParseInt(attr.Val, 10, 64)
					p.Time = time.Unix(unix, 0)
					return
				}
			}

		// skip post header with name of group and check-in
		case "h5":
			return

		// skip "show more", aria-hidden and accessible stuff
		case "span":
			fallthrough
		case "a":
			fallthrough
		case "div":
			for _, attr := range n.Attr {
				if attr.Key == "class" && attr.Val == "text_exposed_hide" {
					return
				}
				if attr.Key == "class" && strings.Contains(attr.Val, "accessible_elem") {
					return
				}

				if attr.Key == "aria-hidden" && attr.Val == "true" {
					return
				}
			}

		// parse paragraphs
		case "p":
			parseParagraph(n, p)
			p.Content += "\n\n"
			return

		// get images
		case "img":
			parseImage(n, p)
			return
		}
	}

	// fetch all pure text elements outside proper post paragraphs
	if n.Type == html.TextNode {
		p.Content += n.Data
		if n.Parent.Type == html.ElementNode && n.Parent.Data == "a" {
			parseLink(n.Parent, p)
		}
		p.Content += "\n"
	}

	// loop deeper
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		parseNode(c, p)
	}
}

func parseParagraph(n *html.Node, p *post) {
	// fetch all pure text elements
	if n.Type == html.TextNode {
		p.Content += n.Data
		if n.Parent.Type == html.ElementNode && n.Parent.Data == "a" {
			parseLink(n.Parent, p)
			return
		}
	}

	// normal linebreak within paragraphs
	if n.Type == html.ElementNode && n.Data == "br" {
		p.Content += "\n"
		return
	}

	// loop deeper
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		parseParagraph(c, p)
	}
}

func parseImage(n *html.Node, p *post) {
	img := &image{}
	for _, attr := range n.Attr {
		switch attr.Key {
		case "src":
			img.Source = attr.Val
		case "alt":
			fallthrough
		case "title":
			fallthrough
		case "aria-label":
			img.Caption = attr.Val
		}
	}
	p.Images = append(p.Images, img)
}

func parseLink(n *html.Node, p *post) {
	for _, attr := range n.Attr {
		switch attr.Key {
		case "href":
			urlraw, _ := url.PathUnescape(attr.Val)
			url, _ := url.Parse(urlraw)
			external := url.Query().Get("u")
			if external != "" {
				p.Content += " [" + external + "]"
			} else {
				base := strings.Split(urlraw, "?")[0]
				if base[0] == '/' {
					base = "https://www.facebook.com" + base
				}
				p.Content += " [" + base + "]"
			}
		}
	}
}