这是我完成以身作则和https://tour.golang.org后的第一个合适的Go程序。我有Python的背景。
这个程序从华兹尼克中抓取定义,然后在命令行中很好地打印它们。它是为了在命令行中快速查找一个单词而制作的。
我希望有人能够回顾这段代码,并就效率低下问题提出建议,但特别是对于代码中任何不是惯用的、并不是Go代码的好例子的部分。为了突出显示一个部分,在代码的末尾,我使用一段通道来跟踪多个工作人员。我很高兴听到关于这一办法的意见。
package main
import (
"errors"
"fmt"
"github.com/PuerkitoBio/goquery"
"gopkg.in/gookit/color.v1"
"net/http"
"os"
"sort"
"strings"
"text/tabwriter"
)
// definition is a struct for storing simple word definitions.
type definition struct {
wordType string // noun, verb, interjection, intransitive verb, etc
text string // The actual definition itself
}
// ctxDefinition includes additional info about a definition.
type ctxDefinition struct {
dict string // The dictionary the definition comes from
rank uint8 // Where this definition is compared to the others
def definition
}
// byDictionary sorts ctxDefintions by rank and dictionary.
// Returns a map with dictionary names as keys, and definition slices as values
func byDictionary(cDs []ctxDefinition) map[string][]definition {
pre := make(map[string][]ctxDefinition) // Used for ranking, not returned
// Add all the defintions to the map
for _, cD := range cDs {
pre[cD.dict] = append(pre[cD.dict], cD)
}
// Sort by rank
for k := range pre {
sort.Slice(pre[k], func(i, j int) bool {
return pre[k][i].rank < pre[k][j].rank
})
}
// Convert to hold definitions only, not context
m := make(map[string][]definition)
for dict, cDs := range pre {
for _, cD := range cDs {
m[dict] = append(m[dict], cD.def)
}
}
return m
}
// render returns a formatted definition, optionally with color.
// This contains some opinionted color defaults, as opposed to renderOps
func (d *definition) render(c bool) string {
if c {
return color.New(color.OpItalic).Render(d.wordType) + "\t" + d.text
}
return d.wordType + "\t" + d.text
}
// renderOps returns a formatted color definition, according to the provided styles.
func (d *definition) renderOps(wordType, text color.Style) string {
return wordType.Render(d.wordType) + "\t\t" + text.Render(d.text)
}
// pprintCtxDefs pretty prints multiple context definitions, optionally with color.
func pprintCtxDefs(cDs []ctxDefinition, c bool) {
m := byDictionary(cDs)
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
//esc := string(tabwriter.Escape)
for dict, defs := range m {
if c {
// Bracket dict name with escape characters so it's not part of the tabbing
fmt.Fprintln(w, color.New(color.BgGray).Render(dict))
// Print first definition differently
fmt.Fprintf(w, "%s\n", defs[0].renderOps(color.New(color.OpItalic, color.OpBold), color.New(color.Cyan)))
for _, def := range defs[1:] {
fmt.Fprintf(w, "%s\n", def.render(true))
}
} else {
fmt.Fprintf(w, dict+"\n")
for _, def := range defs {
fmt.Fprintf(w, "%s\n", def.render(false))
}
}
fmt.Fprintln(w)
}
w.Flush()
}
// wordnikLookup returns a slice of ctxDefinitions for the provided word.
// Looks up words using wordnik.com
func wordnikLookup(w string, client *http.Client) ([]ctxDefinition, error) {
req, err := http.NewRequest("GET", "https://www.wordnik.com/words/"+w, nil)
if err != nil {
panic(err)
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
resp, err := client.Do(req)
if err != nil {
return nil, errors.New("couldn't connect to wordnik")
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, errors.New("200 not returned, likely a non-word like '../test' was passed")
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, errors.New("malformed HTML from wordnik")
}
ret := make([]ctxDefinition, 0)
s := doc.Find(".word-module.module-definitions#define .guts.active").First()
dicts := s.Find("h3")
lists := s.Find("ul")
// Go through each list of defs., then each def., and add them
lists.Each(func(i int, list *goquery.Selection) {
list.Find("li").Each(func(j int, def *goquery.Selection) {
// wordType
wT := def.Find("abbr").First().Text() + " " + def.Find("i").First().Text()
wT = strings.TrimSpace(wT)
// dictionary
d := dicts.Get(i).FirstChild.Data[5:] // strip the "from " prefix
d = strings.ToUpper(string(d[0])) + string(d[1:]) // Capitalize first letter
if string(d[len(d)-1]) == "." { // Remove ending period
d = string(d[:len(d)-1])
}
// definition text - remove the wordType at the beginning of the definition
t := strings.TrimSpace(def.Text()[len(wT):])
t = strings.ToUpper(string(t[0])) + string(t[1:]) // Capitalize first letter
ret = append(ret, ctxDefinition{
dict: d,
rank: uint8(j),
def: definition{
wordType: wT,
text: t,
},
})
})
})
return ret, nil
}
func main() {
if len(os.Args) <= 1 {
fmt.Println("Provide a word to lookup.")
return
}
// TODO: Support multiple words concurrently
client := &http.Client{}
words := os.Args[1:]
// Lookup each word concurrently and store results
results := make([]chan []ctxDefinition, 0)
for i, word := range words {
results = append(results, make(chan []ctxDefinition))
go func(ind int, w string) {
defs, err := wordnikLookup(w, client)
if err != nil {
panic(err)
}
results[ind] <- defs
}(i, word)
}
// Print the answer of each word
for i, result := range results {
// TODO: Write to buffer, then flush after result comes in
color.New(color.BgRed, color.White).Println(words[i])
pprintCtxDefs(<-result, true)
}
}此代码是在GPL版本3下授权的。它将被上传到Github。任何想要重用或修改此代码的人都必须遵守该许可证。
发布于 2020-04-07 12:03:59
主函数的两个循环是有问题的。
在两个切片上使用索引,假设它们的长度相同,这是非常复杂的。
第一个循环是无界的,这意味着如果我传递大量的单词,它将启动许多例程、请求等等。这肯定会给一些用户带来麻烦。
另外,第二个循环是次优的,因为它不等待最快的结果开始输出结果,而是等待其切片的第一个项。这意味着,如果第一个请求是,由于任何原因,缓慢,所有其他的结果,可能会更快将不会出现,直到第一个项目完成。在并发编程中,这绝对是不想要的行为。
剩下的代码是okish,我还没有挖掘那么多。
下面是您的主函数的更新版本,它使用更惯用的方式将数据(输入字、输出结果(包括可能的错误)输入和输出到具有更随意的同步机制的例程中。为了演示的目的,它还将并发请求的数量限制在4个。
package main
import (
"errors"
"fmt"
"net/http"
"os"
"sort"
"strings"
"sync"
"text/tabwriter"
"github.com/PuerkitoBio/goquery"
"github.com/gookit/color"
)
// definition is a struct for storing simple word definitions.
type definition struct {
wordType string // noun, verb, interjection, intransitive verb, etc
text string // The actual definition itself
}
// ctxDefinition includes additional info about a definition.
type ctxDefinition struct {
dict string // The dictionary the definition comes from
rank uint8 // Where this definition is compared to the others
def definition
}
// byDictionary sorts ctxDefintions by rank and dictionary.
// Returns a map with dictionary names as keys, and definition slices as values
func byDictionary(cDs []ctxDefinition) map[string][]definition {
pre := make(map[string][]ctxDefinition) // Used for ranking, not returned
// Add all the defintions to the map
for _, cD := range cDs {
pre[cD.dict] = append(pre[cD.dict], cD)
}
// Sort by rank
for k := range pre {
sort.Slice(pre[k], func(i, j int) bool {
return pre[k][i].rank < pre[k][j].rank
})
}
// Convert to hold definitions only, not context
m := make(map[string][]definition)
for dict, cDs := range pre {
for _, cD := range cDs {
m[dict] = append(m[dict], cD.def)
}
}
return m
}
// render returns a formatted definition, optionally with color.
// This contains some opinionted color defaults, as opposed to renderOps
func (d *definition) render(c bool) string {
if c {
return color.New(color.OpItalic).Render(d.wordType) + "\t" + d.text
}
return d.wordType + "\t" + d.text
}
// renderOps returns a formatted color definition, according to the provided styles.
func (d *definition) renderOps(wordType, text color.Style) string {
return wordType.Render(d.wordType) + "\t\t" + text.Render(d.text)
}
// pprintCtxDefs pretty prints multiple context definitions, optionally with color.
func pprintCtxDefs(cDs []ctxDefinition, c bool) {
m := byDictionary(cDs)
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
//esc := string(tabwriter.Escape)
for dict, defs := range m {
if c {
// Bracket dict name with escape characters so it's not part of the tabbing
fmt.Fprintln(w, color.New(color.BgGray).Render(dict))
// Print first definition differently
fmt.Fprintf(w, "%s\n", defs[0].renderOps(color.New(color.OpItalic, color.OpBold), color.New(color.Cyan)))
for _, def := range defs[1:] {
fmt.Fprintf(w, "%s\n", def.render(true))
}
} else {
fmt.Fprintf(w, dict+"\n")
for _, def := range defs {
fmt.Fprintf(w, "%s\n", def.render(false))
}
}
fmt.Fprintln(w)
}
w.Flush()
}
// wordnikLookup returns a slice of ctxDefinitions for the provided word.
// Looks up words using wordnik.com
func wordnikLookup(w string, client *http.Client) ([]ctxDefinition, error) {
req, err := http.NewRequest("GET", "https://www.wordnik.com/words/"+w, nil)
if err != nil {
return nil, errors.New("couldn't connect to wordnik")
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
resp, err := client.Do(req)
if err != nil {
return nil, errors.New("couldn't connect to wordnik")
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, errors.New("200 not returned, likely a non-word like '../test' was passed")
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, errors.New("malformed HTML from wordnik")
}
ret := make([]ctxDefinition, 0)
s := doc.Find(".word-module.module-definitions#define .guts.active").First()
dicts := s.Find("h3")
lists := s.Find("ul")
// Go through each list of defs., then each def., and add them
lists.Each(func(i int, list *goquery.Selection) {
list.Find("li").Each(func(j int, def *goquery.Selection) {
// wordType
wT := def.Find("abbr").First().Text() + " " + def.Find("i").First().Text()
wT = strings.TrimSpace(wT)
// dictionary
d := dicts.Get(i).FirstChild.Data[5:] // strip the "from " prefix
d = strings.ToUpper(string(d[0])) + string(d[1:]) // Capitalize first letter
if string(d[len(d)-1]) == "." { // Remove ending period
d = string(d[:len(d)-1])
}
// definition text - remove the wordType at the beginning of the definition
t := strings.TrimSpace(def.Text()[len(wT):])
t = strings.ToUpper(string(t[0])) + string(t[1:]) // Capitalize first letter
ret = append(ret, ctxDefinition{
dict: d,
rank: uint8(j),
def: definition{
wordType: wT,
text: t,
},
})
})
})
return ret, nil
}
type scrapRes struct {
word string
defs []ctxDefinition
err error
}
func scrapWordnik(client *http.Client, input chan string, output chan scrapRes) {
for w := range input {
defs, err := wordnikLookup(w, client)
output <- scrapRes{
word: w,
defs: defs,
err: err,
}
}
}
func main() {
if len(os.Args) <= 1 {
fmt.Println("Provide a word to lookup.")
return
}
words := os.Args[1:]
// TODO: Support multiple words concurrently
client := http.DefaultClient // prefer default http client if you are not configuring it.
// prepare async communication pipes
input := make(chan string)
output := make(chan scrapRes)
// start async workers
var wg sync.WaitGroup
for i := 0; i < 4; i++ {
wg.Add(1)
go func() {
defer wg.Done()
scrapWordnik(client, input, output)
}()
}
go func() {
wg.Wait()
close(output)
}()
//feed input communication pipe
for _, word := range words {
input <- word
}
close(input)
//read output to get results
for r := range output {
color.New(color.BgRed, color.White).Println(r.word)
pprintCtxDefs(r.defs, true)
}
}
```https://codereview.stackexchange.com/questions/240071
复制相似问题