오래 못 할 짓 하지 않기

Go 언어 실습 3 코드 본문

4학년/캡스톤 (Capstone)

Go 언어 실습 3 코드

쫑알bot 2024. 12. 24. 16:56
728x90
package main

import (
	"encoding/csv"
	"fmt"
	"log"
	"net/http"
	"os"
	"strconv"

	"github.com/PuerkitoBio/goquery"
)

var baseURL string = "https://www.saramin.co.kr/zf_user/search/recruit?search_area=main&search_done=y&search_optional_item=n&searchType=search&searchword=python&recruitPage="

type extractedJob struct {
	id        string
	title     string
	location  string
	jobsector string
}

func main() {
	var jobs []extractedJob
	c := make(chan []extractedJob)
	totalPages := getPages()
	fmt.Println(totalPages)

	for i := 0; i < totalPages; i++ {
		go getpage(i, c)
	}

	for i := 0; i < totalPages; i++ {
		extractedJobs := <-c
		jobs = append(jobs, extractedJobs...)
	}

	writeJobs(jobs)

	fmt.Println("Done, extracted", len(jobs))
}

func writeJobs(jobs []extractedJob) {
	file, err := os.Create("jobs.csv")
	checkErr(err)

	w := csv.NewWriter(file)
	defer w.Flush()

	headers := []string{"ID", "Title", "Location", "Jobsector"}

	wErr := w.Write(headers)
	checkErr(wErr)

	for _, job := range jobs {
		jobSlice := []string{job.id, job.title, job.location, job.jobsector}
		jwErr := w.Write(jobSlice)
		checkErr(jwErr)
	}

}

func getpage(page int, mainC chan<- []extractedJob) {
	var jobs []extractedJob
	c := make(chan extractedJob)
	pageURL := baseURL + strconv.Itoa(page)
	fmt.Println("Requsting " + pageURL)
	res, err := http.Get(pageURL)
	checkErr(err)
	checkCode(res)

	defer res.Body.Close()

	doc, err := goquery.NewDocumentFromReader(res.Body)
	checkErr(err)

	searchCards := doc.Find(".item_recruit")

	searchCards.Each(func(i int, card *goquery.Selection) {
		go extractJob(card, c)
	})

	for i := 0; i < searchCards.Length(); i++ {
		job := <-c
		jobs = append(jobs, job)

	}
	mainC <- jobs
}

func extractJob(card *goquery.Selection, c chan<- extractedJob) {
	id, _ := card.Attr("value")
	title := card.Find(".job_tit").Find("a").Text()
	location := card.Find(".job_condition").Find("a").Text()
	jobsector := card.Find(".job_sector").Find("a").Text()

	c <- extractedJob{
		id:        id,
		title:     title,
		location:  location,
		jobsector: jobsector}
}

// func cleanString(str string) string {
// return strings.Join(strings.Fields(strings.TrimSpace(str)), " ")
// }

func getPages() int {
	pages := 0
	res, err := http.Get(baseURL)
	checkErr(err)
	checkCode(res)

	defer res.Body.Close()

	doc, err := goquery.NewDocumentFromReader(res.Body)
	checkErr(err)

	doc.Find(".pagination").Each(func(i int, s *goquery.Selection) {
		pages = s.Find("a").Length()
	})

	return pages
}

func checkErr(err error) {
	if err != nil {
		log.Fatalln(err)
		fmt.Println("Error!")
	}
}

func checkCode(res *http.Response) {
	if res.StatusCode != 200 {
		log.Fatalln("Request failed with Status:", res.StatusCode)
	}
}

'4학년 > 캡스톤 (Capstone)' 카테고리의 다른 글

Erasure Coding - Reed Solomon  (0) 2025.01.07
rclone 주요 기능 분석 ( flow 위주 )  (0) 2024.12.27
Go 언어 실습 2  (0) 2024.12.24
Go 언어 실습 1  (0) 2024.12.23
Go언어 정리  (0) 2024.12.23