ncore-leaderboard/main.go

192 lines
3.9 KiB
Go

package main
import (
"encoding/csv"
"fmt"
"log"
"net/http"
"os"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/joho/godotenv"
)
const (
baseURL = "https://ncore.pro/profile.php?id="
startProfile = 1
endProfile = 1812000
concurrency = 50
outputFile = "output.log"
writeBatch = 100
)
var (
nick string
pass string
client *http.Client
wg sync.WaitGroup
mu sync.Mutex
lines []Line
processed int32
)
type Line struct {
URL string
SecondCol int
}
func init() {
_ = godotenv.Load(".env.local")
godotenv.Load()
nick = os.Getenv("NICK")
pass = os.Getenv("PASS")
client = &http.Client{}
}
func fetchProfile(id int) {
defer wg.Done()
url := fmt.Sprintf("%s%d", baseURL, id)
req, err := http.NewRequest("GET", url, nil)
if err != nil {
log.Printf("Error creating request for %d: %v\n", id, err)
return
}
req.Header.Set("Cookie", fmt.Sprintf("nick=%s; pass=%s", nick, pass))
resp, err := client.Do(req)
if err != nil {
log.Printf("Error fetching profile %d: %v\n", id, err)
return
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
log.Printf("Error parsing profile document for %d: %v\n", id, err)
return
}
doc.Find(".userbox_tartalom_mini").Each(func(i int, s *goquery.Selection) {
s.Find(".profil_jobb_elso2").Each(func(ii int, labelSel *goquery.Selection) {
label := labelSel.Text()
valueSel := labelSel.Next()
if valueSel.Length() > 0 {
value := valueSel.Text()
switch label {
case "Helyezés:":
rank := strings.TrimSuffix(value, ".")
rankInt, err := strconv.Atoi(rank)
if err != nil {
log.Printf("Skipping profile %d due to invalid rank: %s\n", id, rank)
return
}
mu.Lock()
lines = append(lines, Line{URL: url, SecondCol: rankInt})
mu.Unlock()
atomic.AddInt32(&processed, 1)
if atomic.LoadInt32(&processed)%writeBatch == 0 {
writeSortedOutput()
}
printProgress()
}
}
})
})
}
func printProgress() {
fmt.Printf("\rProcessed %d profiles...", atomic.LoadInt32(&processed))
}
func quicksort(lines []Line, low, high int) {
if low < high {
p := partition(lines, low, high)
quicksort(lines, low, p-1)
quicksort(lines, p+1, high)
}
}
func partition(lines []Line, low, high int) int {
pivot := lines[high].SecondCol
i := low - 1
for j := low; j < high; j++ {
if lines[j].SecondCol < pivot {
i++
lines[i], lines[j] = lines[j], lines[i]
}
}
lines[i+1], lines[high] = lines[high], lines[i+1]
return i + 1
}
func sortLinesQuick() {
if len(lines) > 1 {
quicksort(lines, 0, len(lines)-1)
}
}
func writeSortedOutput() {
mu.Lock()
defer mu.Unlock()
sortLinesQuick()
file, err := os.Create(outputFile)
if err != nil {
log.Fatalf("Error creating output file: %v\n", err)
}
defer file.Close()
writer := csv.NewWriter(file)
defer writer.Flush()
for _, line := range lines {
if err := writer.Write([]string{line.URL, strconv.Itoa(line.SecondCol)}); err != nil {
log.Printf("Error writing line to output file: %v\n", err)
}
}
}
func main() {
if _, err := os.Stat(outputFile); err == nil {
var response string
fmt.Printf("Output file %s already exists. Overwrite? (yes/no): ", outputFile)
fmt.Scanln(&response)
if response != "yes" {
log.Println("Exiting. Please rename or remove the existing output file.")
return
}
err := os.Remove(outputFile)
if err != nil {
log.Fatalf("Failed to remove existing output file: %v\n", err)
}
}
fmt.Println("Scraping in progress...")
startTime := time.Now()
for i := startProfile; i <= endProfile; i++ {
wg.Add(1)
go fetchProfile(i)
if i%concurrency == 0 {
wg.Wait()
}
}
wg.Wait()
writeSortedOutput()
elapsedTime := time.Since(startTime)
fmt.Printf("\nScraping and sorting completed in %s\n", elapsedTime)
}