Skip to content

Commit 06af507

Browse files
committed
add build hexo post
1 parent 14e362d commit 06af507

File tree

8 files changed

+413
-171
lines changed

8 files changed

+413
-171
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.idea/

.idea/workspace.xml

Lines changed: 232 additions & 48 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# 批量导出CSDN博客
2+
> 批量导出csnd博客,并转化为hexo博客样式
3+
4+
## 使用
5+
6+
```bash
7+
go run main.go -username 你的csdn用户名 -page 1
8+
```
9+
> page不写,默认为下载全部页

go.mod

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
11
module csdn-hexo
22

33
go 1.12
4+
5+
require (
6+
github.com/qianlnk/pgbar v0.0.0-20190929032005-46c23acad4ed
7+
github.com/qianlnk/to v0.0.0-20180426070425-a52c7fda1751 // indirect
8+
)

go.sum

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
github.com/qianlnk/pgbar v0.0.0-20190929032005-46c23acad4ed h1:/nQRgal0OAvl64fVVo0IrwlMt8vXypxc/a+N0Is80VY=
2+
github.com/qianlnk/pgbar v0.0.0-20190929032005-46c23acad4ed/go.mod h1:4YWkn3EVkh8c1BDlVmw+Zh2QLhs+MbAg4xy4RqcKMsA=
3+
github.com/qianlnk/to v0.0.0-20180426070425-a52c7fda1751 h1:3EYaPrwMGOaFxBbiLlsfRGFNlSLJ3ETjkPbTfkG5IGQ=
4+
github.com/qianlnk/to v0.0.0-20180426070425-a52c7fda1751/go.mod h1:HYAQIJIdgW9cGr75BDsucQMgKREt00mECJHOskH5n5k=

hexo.go

Lines changed: 0 additions & 1 deletion
This file was deleted.

main.go

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,163 @@
11
package main
2+
3+
import (
4+
"encoding/json"
5+
"flag"
6+
"fmt"
7+
"io/ioutil"
8+
"math/rand"
9+
"net/http"
10+
"os"
11+
"regexp"
12+
"strings"
13+
"sync"
14+
"time"
15+
16+
"github.com/qianlnk/pgbar"
17+
)
18+
19+
// Crawl posts from CSDN
20+
const (
21+
ListPostURL = "https://blog.csdn.net/%s/article/list/%d?"
22+
PostDetailURL = "https://mp.csdn.net/mdeditor/getArticle?id=%s"
23+
HexoHeader = `
24+
---
25+
title: %s
26+
date: %s
27+
tags: [%s]
28+
categories: %s
29+
---
30+
`
31+
)
32+
33+
var postTime = time.Now()
34+
35+
type DetailData struct {
36+
Data PostDetail `json:"data"`
37+
}
38+
39+
type PostDetail struct {
40+
Title string `json:"title"`
41+
Description string `json:"description"`
42+
Markdowncontent string `json:"markdowncontent"`
43+
Tags string `json:"tags"`
44+
Categories string `json:"categories"`
45+
}
46+
47+
var (
48+
username string
49+
page int
50+
currentPage = 1
51+
count int
52+
wg sync.WaitGroup
53+
bar *pgbar.Bar
54+
)
55+
56+
func init() {
57+
flag.StringVar(&username, "username", "junmoxi", "your csdn username")
58+
flag.IntVar(&page, "page", -1, "download pages")
59+
flag.Parse()
60+
}
61+
62+
func main() {
63+
urls, err := crawlPosts(username)
64+
if err != nil {
65+
panic(err)
66+
}
67+
bar = pgbar.NewBar(0, "下载进度", len(urls))
68+
for _, url := range urls {
69+
wg.Add(1)
70+
go crawlPostMarkdown(url)
71+
}
72+
73+
wg.Wait()
74+
}
75+
76+
// Crawl posts by username
77+
func crawlPosts(username string) ([]string, error) {
78+
client := http.Client{}
79+
var (
80+
urls []string
81+
err error
82+
)
83+
84+
for {
85+
resp, err := client.Get(fmt.Sprintf(ListPostURL, username, currentPage))
86+
if err != nil {
87+
return nil, err
88+
}
89+
90+
data, err := ioutil.ReadAll(resp.Body)
91+
92+
r := regexp.MustCompile(`<h4 class="">\s*<a href=".*?"`)
93+
finds := r.FindAll(data, -1)
94+
95+
for _, f := range finds {
96+
ss := strings.Split(string(f), `"`)
97+
if len(ss) >= 4 {
98+
urls = append(urls, ss[3])
99+
}
100+
}
101+
102+
if len(finds) == 0 {
103+
return urls, nil
104+
}
105+
106+
if page != -1 && currentPage >= page {
107+
return urls, nil
108+
}
109+
currentPage++
110+
}
111+
112+
return urls, err
113+
}
114+
115+
func crawlPostMarkdown(url string) (*PostDetail, error) {
116+
index := strings.LastIndex(url, "/")
117+
id := url[index+1:]
118+
119+
client := http.Client{}
120+
121+
req, _ := http.NewRequest("GET", fmt.Sprintf(PostDetailURL, id), nil)
122+
req.Header.Set("cookie", "UserName=junmoxi; UserToken=de709e85392f4b8a8d19d69eb2273c56;")
123+
124+
resp, err := client.Do(req)
125+
if err != nil {
126+
return nil, err
127+
}
128+
129+
data, err := ioutil.ReadAll(resp.Body)
130+
if err != nil {
131+
return nil, err
132+
}
133+
134+
post := new(DetailData)
135+
err = json.Unmarshal(data, post)
136+
if err != nil {
137+
return nil, err
138+
}
139+
140+
go buildPost(post.Data)
141+
142+
return nil, nil
143+
}
144+
145+
func buildPost(post PostDetail) {
146+
147+
date := postTime.Format("2006-01-02 15:03:04")
148+
header := fmt.Sprintf(HexoHeader, post.Title, date, post.Tags, post.Categories)
149+
150+
ioutil.WriteFile(
151+
fmt.Sprintf("%s.md", post.Title),
152+
[]byte(fmt.Sprintf("%s\n%s", header, post.Markdowncontent)),
153+
os.ModePerm)
154+
155+
rand.Seed(time.Now().UnixNano())
156+
d := rand.Intn(3) + 1
157+
postTime = postTime.AddDate(0, 0, -d)
158+
159+
count++
160+
161+
defer bar.Add()
162+
defer wg.Done()
163+
}

spider.go

Lines changed: 0 additions & 122 deletions
This file was deleted.

0 commit comments

Comments
 (0)