mirror of
https://github.com/skidoodle/ncore-leaderboard.git
synced 2025-02-15 05:29:15 +01:00
192 lines
3.9 KiB
Go
192 lines
3.9 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/csv"
|
|
"fmt"
|
|
"log"
|
|
"net/http"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
"github.com/joho/godotenv"
|
|
)
|
|
|
|
const (
|
|
baseURL = "https://ncore.pro/profile.php?id="
|
|
startProfile = 1
|
|
endProfile = 1812000
|
|
concurrency = 50
|
|
outputFile = "output.log"
|
|
writeBatch = 100
|
|
)
|
|
|
|
var (
|
|
nick string
|
|
pass string
|
|
client *http.Client
|
|
wg sync.WaitGroup
|
|
mu sync.Mutex
|
|
lines []Line
|
|
processed int32
|
|
)
|
|
|
|
type Line struct {
|
|
URL string
|
|
SecondCol int
|
|
}
|
|
|
|
func init() {
|
|
_ = godotenv.Load(".env.local")
|
|
godotenv.Load()
|
|
nick = os.Getenv("NICK")
|
|
pass = os.Getenv("PASS")
|
|
client = &http.Client{}
|
|
}
|
|
|
|
func fetchProfile(id int) {
|
|
defer wg.Done()
|
|
url := fmt.Sprintf("%s%d", baseURL, id)
|
|
|
|
req, err := http.NewRequest("GET", url, nil)
|
|
if err != nil {
|
|
log.Printf("Error creating request for %d: %v\n", id, err)
|
|
return
|
|
}
|
|
req.Header.Set("Cookie", fmt.Sprintf("nick=%s; pass=%s", nick, pass))
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
log.Printf("Error fetching profile %d: %v\n", id, err)
|
|
return
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return
|
|
}
|
|
|
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|
if err != nil {
|
|
log.Printf("Error parsing profile document for %d: %v\n", id, err)
|
|
return
|
|
}
|
|
|
|
doc.Find(".userbox_tartalom_mini").Each(func(i int, s *goquery.Selection) {
|
|
s.Find(".profil_jobb_elso2").Each(func(ii int, labelSel *goquery.Selection) {
|
|
label := labelSel.Text()
|
|
valueSel := labelSel.Next()
|
|
if valueSel.Length() > 0 {
|
|
value := valueSel.Text()
|
|
switch label {
|
|
case "Helyezés:":
|
|
rank := strings.TrimSuffix(value, ".")
|
|
rankInt, err := strconv.Atoi(rank)
|
|
if err != nil {
|
|
log.Printf("Skipping profile %d due to invalid rank: %s\n", id, rank)
|
|
return
|
|
}
|
|
mu.Lock()
|
|
lines = append(lines, Line{URL: url, SecondCol: rankInt})
|
|
mu.Unlock()
|
|
atomic.AddInt32(&processed, 1)
|
|
if atomic.LoadInt32(&processed)%writeBatch == 0 {
|
|
writeSortedOutput()
|
|
}
|
|
printProgress()
|
|
}
|
|
}
|
|
})
|
|
})
|
|
}
|
|
|
|
func printProgress() {
|
|
fmt.Printf("\rProcessed %d profiles...", atomic.LoadInt32(&processed))
|
|
}
|
|
|
|
func quicksort(lines []Line, low, high int) {
|
|
if low < high {
|
|
p := partition(lines, low, high)
|
|
quicksort(lines, low, p-1)
|
|
quicksort(lines, p+1, high)
|
|
}
|
|
}
|
|
|
|
func partition(lines []Line, low, high int) int {
|
|
pivot := lines[high].SecondCol
|
|
i := low - 1
|
|
for j := low; j < high; j++ {
|
|
if lines[j].SecondCol < pivot {
|
|
i++
|
|
lines[i], lines[j] = lines[j], lines[i]
|
|
}
|
|
}
|
|
lines[i+1], lines[high] = lines[high], lines[i+1]
|
|
return i + 1
|
|
}
|
|
|
|
func sortLinesQuick() {
|
|
if len(lines) > 1 {
|
|
quicksort(lines, 0, len(lines)-1)
|
|
}
|
|
}
|
|
|
|
func writeSortedOutput() {
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
|
|
sortLinesQuick()
|
|
|
|
file, err := os.Create(outputFile)
|
|
if err != nil {
|
|
log.Fatalf("Error creating output file: %v\n", err)
|
|
}
|
|
defer file.Close()
|
|
|
|
writer := csv.NewWriter(file)
|
|
defer writer.Flush()
|
|
|
|
for _, line := range lines {
|
|
if err := writer.Write([]string{line.URL, strconv.Itoa(line.SecondCol)}); err != nil {
|
|
log.Printf("Error writing line to output file: %v\n", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func main() {
|
|
if _, err := os.Stat(outputFile); err == nil {
|
|
var response string
|
|
fmt.Printf("Output file %s already exists. Overwrite? (yes/no): ", outputFile)
|
|
fmt.Scanln(&response)
|
|
if response != "yes" {
|
|
log.Println("Exiting. Please rename or remove the existing output file.")
|
|
return
|
|
}
|
|
err := os.Remove(outputFile)
|
|
if err != nil {
|
|
log.Fatalf("Failed to remove existing output file: %v\n", err)
|
|
}
|
|
}
|
|
|
|
fmt.Println("Scraping in progress...")
|
|
startTime := time.Now()
|
|
|
|
for i := startProfile; i <= endProfile; i++ {
|
|
wg.Add(1)
|
|
go fetchProfile(i)
|
|
|
|
if i%concurrency == 0 {
|
|
wg.Wait()
|
|
}
|
|
}
|
|
wg.Wait()
|
|
|
|
writeSortedOutput()
|
|
|
|
elapsedTime := time.Since(startTime)
|
|
fmt.Printf("\nScraping and sorting completed in %s\n", elapsedTime)
|
|
}
|