Files
ncore-stats/scraper.go
T
2026-03-22 22:04:38 +01:00

149 lines
3.8 KiB
Go

package main
import (
"context"
"fmt"
"net/http"
"regexp"
"strconv"
"strings"
"sync"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/sirupsen/logrus"
)
func (s *State) worker(ctx context.Context) {
ticker := time.NewTicker(24 * time.Hour)
defer ticker.Stop()
s.scrapeAll(ctx)
for {
select {
case <-ticker.C:
s.scrapeAll(ctx)
case <-ctx.Done():
return
}
}
}
func (s *State) scrapeAll(ctx context.Context) {
rows, err := s.db.Query("SELECT id, display_name, profile_id FROM users")
if err != nil {
logrus.Errorf("User query failed: %v", err)
return
}
defer rows.Close()
var users []User
for rows.Next() {
var u User
if err := rows.Scan(&u.ID, &u.DisplayName, &u.ProfileID); err != nil {
logrus.Errorf("User scan failed: %v", err)
continue
}
users = append(users, u)
}
if len(users) == 0 {
return
}
logrus.Infof("Starting concurrent scrape for %d users", len(users))
sem := make(chan struct{}, 3)
var wg sync.WaitGroup
for _, u := range users {
select {
case <-ctx.Done():
logrus.Info("Scrape cycle cancelled by context")
return
case sem <- struct{}{}:
wg.Add(1)
go func(user User) {
defer wg.Done()
defer func() { <-sem }()
time.Sleep(time.Duration(200+(user.ID%1000)) * time.Millisecond)
profile, err := s.fetchProfile(ctx, user)
if err != nil {
logrus.Errorf("[%s] Fetch failed: %v", user.DisplayName, err)
return
}
_, err = s.db.Exec(`INSERT INTO profile_history(user_id, timestamp, rank, upload, current_upload, current_download, points, seeding_count) VALUES(?, ?, ?, ?, ?, ?, ?, ?)`,
user.ID, profile.Timestamp, profile.Rank, profile.Upload, profile.CurrentUpload, profile.CurrentDownload, profile.Points, profile.SeedingCount)
if err != nil {
logrus.Errorf("[%s] DB log failed: %v", user.DisplayName, err)
} else {
logrus.Infof("[%s] Metrics recorded", user.DisplayName)
}
}(u)
}
}
wg.Wait()
logrus.Info("Scrape cycle complete")
}
func (s *State) fetchProfile(ctx context.Context, user User) (*ProfileData, error) {
req, err := http.NewRequestWithContext(ctx, "GET", ncoreBaseURL+user.ProfileID, nil)
if err != nil {
return nil, fmt.Errorf("create request: %w", err)
}
req.Header.Set("Cookie", fmt.Sprintf("nick=%s; pass=%s", s.config.Ncore.Nick, s.config.Ncore.Pass))
resp, err := s.client.Do(req)
if err != nil {
return nil, fmt.Errorf("do request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("unexpected status %d", resp.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("parse document: %w", err)
}
p := &ProfileData{Owner: user.DisplayName, Timestamp: time.Now()}
doc.Find(".userbox_tartalom_mini .profil_jobb_elso2").Each(func(i int, sel *goquery.Selection) {
label := strings.ToLower(sel.Text())
value := strings.TrimSpace(sel.Next().Text())
if strings.Contains(label, "helyezés") { // Rank
p.Rank, _ = strconv.Atoi(strings.TrimSuffix(value, "."))
} else if strings.Contains(label, "feltöltés") { // Upload
p.Upload = value
} else if strings.Contains(label, "pontok") { // Points
p.Points, _ = strconv.Atoi(strings.ReplaceAll(value, " ", ""))
}
})
doc.Find(".lista_mini_fej").Each(func(i int, sel *goquery.Selection) {
text := sel.Text()
if strings.Contains(text, "seeding") || strings.Contains(text, "futó") {
if m := regexp.MustCompile(`\((\d+)\)`).FindStringSubmatch(text); len(m) > 1 {
p.SeedingCount, _ = strconv.Atoi(m[1])
}
}
if m := regexp.MustCompile(`fel: ([\d.]+ \w+/s)`).FindStringSubmatch(text); len(m) > 1 {
p.CurrentUpload = m[1]
}
if m := regexp.MustCompile(`le: ([\d.]+ \w+/s)`).FindStringSubmatch(text); len(m) > 1 {
p.CurrentDownload = m[1]
}
})
return p, nil
}