本文整理匯總了Golang中github.com/miku/span/container.NewStringSet函數的典型用法代碼示例。如果您正苦於以下問題:Golang NewStringSet函數的具體用法?Golang NewStringSet怎麽用?Golang NewStringSet使用的例子?那麽, 這裏精選的函數代碼示例或許可以為您提供幫助。
在下文中一共展示了NewStringSet函數的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Golang代碼示例。
示例1: Languages
// Languages returns the given and guessed languages
// found in abstract and fulltext. Note: This is slow.
// Skip detection on too short strings.
func (article *Article) Languages() []string {
set := container.NewStringSet()
if article.Front.Article.Abstract.Lang != "" {
base, err := language.ParseBase(article.Front.Article.Abstract.Lang)
if err == nil {
set.Add(base.ISO3())
}
}
vals := []string{
article.Front.Article.Abstract.Value,
article.Front.Article.TranslatedAbstract.Title.Value,
article.Body.Section.Value,
}
for _, s := range vals {
if len(s) < 20 {
continue
}
lang, err := span.DetectLang3(s)
if err != nil || lang == "und" {
continue
}
if !acceptedLanguages.Contains(lang) {
continue
}
set.Add(lang)
}
return set.Values()
}
示例2: ISSNList
// ISSNList returns a list of ISSN.
func (doc Document) ISSNList() []string {
issns := container.NewStringSet()
for _, s := range span.ISSNPattern.FindAllString(doc.ISSN, -1) {
issns.Add(s)
}
return issns.Values()
}
示例3: MustLoadStringSet
func MustLoadStringSet(paths ...string) *container.StringSet {
s := container.NewStringSet()
for _, path := range paths {
b, err := Asset(path)
if err != nil {
panic(err)
}
rdr := bufio.NewReader(bytes.NewReader(b))
for {
line, err := rdr.ReadString('\n')
if err == io.EOF {
break
}
if err != nil {
panic(err)
}
line = strings.TrimSpace(line)
if line == "" {
continue
}
s.Add(line)
}
}
return s
}
示例4: UnmarshalJSON
// UnmarshalJSON turns a config fragment into a ISSN filter.
func (f *CollectionFilter) UnmarshalJSON(p []byte) error {
var s struct {
Collections []string `json:"collection"`
}
if err := json.Unmarshal(p, &s); err != nil {
return err
}
f.values = *container.NewStringSet(s.Collections...)
return nil
}
示例5: Tags
// Tags returns all ISILs that could be attached to a given intermediate
// schema record.
func (t ISILTagger) Tags(is finc.IntermediateSchema) []string {
isils := container.NewStringSet()
for isil, filters := range t {
for _, f := range filters {
if f.Apply(is) {
isils.Add(isil)
}
}
}
return isils.Values()
}
示例6: Languages
// Languages returns a list of language in 3-letter format.
func (article *Article) Languages() []string {
set := container.NewStringSet()
for _, cm := range article.Front.Article.CustomMetaGroup.CustomMeta {
if cm.Name.Value == "lang" {
base, err := language.ParseBase(cm.Value.Value)
if err == nil {
set.Add(base.ISO3())
}
}
}
return set.Values()
}
示例7: Convert
// Export method from intermediate schema to solr 4/13 schema.
func (s *Solr4Vufind13v1) Convert(is finc.IntermediateSchema) error {
s.Allfields = is.Allfields()
s.Formats = append(s.Formats, is.Format)
s.Fullrecord = "blob:" + is.RecordID
s.Fulltext = is.Fulltext
s.HierarchyParentTitle = append(s.HierarchyParentTitle, is.JournalTitle)
s.ID = is.RecordID
s.Imprint = is.Imprint()
s.ISSN = is.ISSNList()
s.MegaCollections = append(s.MegaCollections, is.MegaCollection)
s.PublishDateSort = is.Date.Year()
s.Publishers = is.Publishers
s.RecordType = finc.AIRecordType
s.Series = append(s.Series, is.JournalTitle)
s.SourceID = is.SourceID
s.Subtitle = is.ArticleSubtitle
s.TitleSort = is.SortableTitle()
s.Topics = is.Subjects
s.URL = is.URL
classes := container.NewStringSet()
for _, s := range is.Subjects {
for _, class := range SubjectMapping.LookupDefault(s, []string{}) {
classes.Add(class)
}
}
s.FincClassFacet = classes.Values()
sanitized := sanitize.HTML(is.ArticleTitle)
s.Title, s.TitleFull, s.TitleShort = sanitized, sanitized, sanitized
for _, lang := range is.Languages {
s.Languages = append(s.Languages, LanguageMap.LookupDefault(lang, lang))
}
for _, author := range is.Authors {
s.SecondaryAuthors = append(s.SecondaryAuthors, author.String())
s.AuthorFacet = append(s.AuthorFacet, author.String())
}
if len(s.SecondaryAuthors) > 0 {
s.Author = s.SecondaryAuthors[0]
}
s.AccessFacet = AIAccessFacet
s.FormatDe15 = []string{FormatDe15.LookupDefault(is.Format, "")}
return nil
}
示例8: NewListFilter
// NewAttachByList reads one record per line from reader. Empty lines are ignored.
func NewListFilter(r io.Reader) (ListFilter, error) {
br := bufio.NewReader(r)
f := ListFilter{Set: container.NewStringSet()}
for {
line, err := br.ReadString('\n')
if err == io.EOF {
break
}
if err != nil {
return f, err
}
line = strings.TrimSpace(line)
if line != "" {
f.Set.Add(line)
}
}
return f, nil
}
示例9: Languages
// Languages returns the given and guessed languages found in abstract and
// fulltext. Note: This is slow. Skip detection on too short strings.
func (doc Document) Languages() []string {
set := container.NewStringSet()
vals := []string{doc.Title, doc.Text}
for _, s := range vals {
if len(s) < 20 {
continue
}
lang, err := span.DetectLang3(s)
if err != nil {
continue
}
if !acceptedLanguages.Contains(lang) {
continue
}
if lang == "und" {
continue
}
set.Add(lang)
}
return set.Values()
}
示例10:
"time"
"github.com/miku/span"
"github.com/miku/span/container"
"github.com/miku/span/finc"
"golang.org/x/text/language"
)
var (
errNoDOI = errors.New("DOI is missing")
errNotImplemented = errors.New("not implemented")
)
var (
// Restricts the possible languages for detection.
acceptedLanguages = container.NewStringSet("deu", "eng", "fra", "ita", "spa")
// Candidate patterns for parsing publishing dates.
datePatterns = []string{
"2006",
"2006-",
"2006-1",
"2006-01",
"2006-1-2",
"2006-1-02",
"2006-01-2",
"2006-01-02",
"2006-Jan",
"2006-January",
"2006-Jan-2",
"2006-Jan-02",
示例11: Convert
// Export method from intermediate schema to solr 4/13 schema.
func (s *Solr5Vufind3) Convert(is finc.IntermediateSchema, withFullrecord bool) error {
s.Allfields = is.Allfields()
s.Formats = append(s.Formats, is.Format)
s.Fullrecord = "blob:" + is.RecordID
s.Fulltext = is.Fulltext
s.ID = is.RecordID
s.Imprint = is.Imprint()
s.ISSN = is.ISSNList()
s.MegaCollections = append(s.MegaCollections, is.MegaCollection)
s.PublishDateSort = is.Date.Year()
s.PublishDate = []string{is.Date.Format("2006-01-02")}
s.Publishers = is.Publishers
s.RecordType = finc.AIRecordType
s.Series = append(s.Series, is.JournalTitle)
s.SourceID = is.SourceID
s.Subtitle = is.ArticleSubtitle
s.TitleSort = is.SortableTitle()
s.Topics = is.Subjects
s.URL = is.URL
classes := container.NewStringSet()
for _, s := range is.Subjects {
for _, class := range SubjectMapping.LookupDefault(s, []string{}) {
classes.Add(class)
}
}
s.FincClassFacet = classes.Values()
sanitized := sanitize.HTML(is.ArticleTitle)
s.Title, s.TitleFull, s.TitleShort = sanitized, sanitized, sanitized
// is we do not have a title yet be rft.btitle is non-empty, use that
if s.Title == "" && is.BookTitle != "" {
sanitized := sanitize.HTML(is.BookTitle)
s.Title, s.TitleFull, s.TitleShort = sanitized, sanitized, sanitized
}
for _, lang := range is.Languages {
s.Languages = append(s.Languages, LanguageMap.LookupDefault(lang, lang))
}
// collect sanizized authors
var authors []string
for _, author := range is.Authors {
sanitized := AuthorReplacer.Replace(author.String())
if sanitized == "" {
continue
}
authors = append(authors, sanitized)
// first, random author goes into author field, others into secondary field, refs. #5778
if s.VF1Author == "" {
s.VF1Author = sanitized
} else {
s.VF1SecondaryAuthors = append(s.VF1SecondaryAuthors, sanitized)
}
s.AuthorFacet = append(s.AuthorFacet, sanitized)
}
if s.VF1Author == "" {
s.VF1Author = finc.NOT_ASSIGNED
}
if len(authors) == 0 {
s.Authors = []string{finc.NOT_ASSIGNED}
} else {
s.Authors = authors
}
s.AccessFacet = AIAccessFacet
// site specific formats
s.FormatDe105 = []string{FormatDe105.LookupDefault(is.Format, "")}
s.FormatDe14 = []string{FormatDe14.LookupDefault(is.Format, "")}
s.FormatDe15 = []string{FormatDe15.LookupDefault(is.Format, "")}
s.FormatDe520 = []string{FormatDe520.LookupDefault(is.Format, "")}
s.FormatDe540 = []string{FormatDe540.LookupDefault(is.Format, "")}
s.FormatDeCh1 = []string{FormatDeCh1.LookupDefault(is.Format, "")}
s.FormatDed117 = []string{FormatDed117.LookupDefault(is.Format, "")}
s.FormatDeGla1 = []string{FormatDeGla1.LookupDefault(is.Format, "")}
s.FormatDel152 = []string{FormatDel152.LookupDefault(is.Format, "")}
s.FormatDel189 = []string{FormatDel189.LookupDefault(is.Format, "")}
s.FormatDeZi4 = []string{FormatDeZi4.LookupDefault(is.Format, "")}
s.FormatDeZwi2 = []string{FormatDeZwi2.LookupDefault(is.Format, "")}
s.FormatNrw = []string{FormatNrw.LookupDefault(is.Format, "")}
s.ContainerVolume = is.Volume
s.ContainerIssue = is.Issue
s.ContainerStartPage = is.StartPage
s.ContainerTitle = is.JournalTitle
s.Institutions = is.Labels
if withFullrecord {
// refs. #8031
b, err := json.Marshal(is)
if err != nil {
return err
}
//.........這裏部分代碼省略.........
示例12: Iterate
RawDate string `xml:"Date"`
Volume string `xml:"Volume"`
Issue string `xml:"Issue"`
RawAuthors []string `xml:"Authors>Author"`
Language string `xml:"Language"`
Abstract string `xml:"Abstract"`
Descriptors string `xml:"Descriptors>Descriptor"`
Text string `xml:"Text"`
XGroup string `xml:"x-group"`
XIssue string `xml:"x-issue"`
}
var (
rawDateReplacer = strings.NewReplacer(`"`, "", "\n", "", "\t", "")
// acceptedLanguages restricts the possible languages for detection.
acceptedLanguages = container.NewStringSet("deu", "eng")
// dbmap maps a database name to one or more "package names"
dbmap = assetutil.MustLoadStringSliceMap("assets/genios/dbmap.json")
)
type Genios struct{}
// Iterate emits Converter elements via XML decoding.
func (s Genios) Iterate(r io.Reader) (<-chan []span.Importer, error) {
return span.FromXML(r, "Document", func(d *xml.Decoder, se xml.StartElement) (span.Importer, error) {
doc := new(Document)
err := d.DecodeElement(&doc, &se)
return doc, err
})
}
示例13: ReadEntries
// ReadAll loads entries from a reader.
func (r *Reader) ReadEntries() (holdings.Entries, error) {
entries := make(holdings.Entries)
for {
cols, entry, err := r.Read()
if err == io.EOF {
break
}
switch err {
case ErrMissingIdentifiers:
if r.SkipMissingIdentifiers {
log.Println("skipping line with missing identifiers")
continue
} else {
return entries, err
}
case ErrIncompleteLine:
if r.SkipIncompleteLines {
log.Println("skipping incomplete line")
continue
} else {
return entries, err
}
case ErrInvalidEmbargo:
if r.SkipInvalidEmbargo {
log.Println("skipping invalid embargo")
continue
} else {
return entries, err
}
}
pi := strings.TrimSpace(cols.PrintIdentifier)
oi := strings.TrimSpace(cols.OnlineIdentifier)
// Slight ISSN restoration (e.g. http://www.jstor.org/kbart/collections/as).
if len(pi) == 8 {
pi = fmt.Sprintf("%s-%s", pi[:4], pi[4:])
}
if len(oi) == 8 {
oi = fmt.Sprintf("%s-%s", oi[:4], oi[4:])
}
// Collect all identifiers.
identifiers := container.NewStringSet()
if pi != "" {
identifiers.Add(pi)
}
if oi != "" {
identifiers.Add(oi)
}
// Extract ISSN from anchor field.
for _, issn := range span.ISSNPattern.FindAllString(cols.Anchor, -1) {
identifiers.Add(issn)
}
if identifiers.Size() == 0 {
if !r.SkipMissingIdentifiers {
return entries, ErrMissingIdentifiers
}
}
for _, id := range identifiers.Values() {
entries[id] = append(entries[id], holdings.License(entry))
}
}
return entries, nil
}
示例14: ToIntermediateSchema
// ToIntermediateSchema converts a doaj document to intermediate schema. For
// now any record, that has no usable date will be skipped.
func (doc Document) ToIntermediateSchema() (*finc.IntermediateSchema, error) {
var err error
output := finc.NewIntermediateSchema()
output.Date, err = doc.Date()
if err != nil {
return output, span.Skip{Reason: err.Error()}
}
output.RawDate = output.Date.Format("2006-01-02")
id := fmt.Sprintf("ai-%s-%s", SourceID, doc.ID)
if len(id) > span.KeyLengthLimit {
return output, span.Skip{Reason: fmt.Sprintf("id too long: %s", id)}
}
output.RecordID = id
output.Genre = Genre
output.DOI = doc.DOI()
output.Format = Format
output.MegaCollection = Collection
output.SourceID = SourceID
output.ISSN = doc.Index.ISSN
output.ArticleTitle = doc.BibJson.Title
output.JournalTitle = doc.BibJson.Journal.Title
output.Volume = doc.BibJson.Journal.Volume
output.Publishers = append(output.Publishers, doc.BibJson.Journal.Publisher)
for _, link := range doc.BibJson.Link {
output.URL = append(output.URL, link.URL)
}
output.StartPage = doc.BibJson.StartPage
output.EndPage = doc.BibJson.EndPage
if sp, err := strconv.Atoi(doc.BibJson.StartPage); err == nil {
if ep, err := strconv.Atoi(doc.BibJson.EndPage); err == nil {
output.PageCount = fmt.Sprintf("%d", ep-sp)
output.Pages = fmt.Sprintf("%d-%d", sp, ep)
}
}
subjects := container.NewStringSet()
for _, s := range doc.Index.SchemaCode {
class := LCCPatterns.LookupDefault(strings.Replace(s, "LCC:", "", -1), finc.NOT_ASSIGNED)
if class != finc.NOT_ASSIGNED {
subjects.Add(class)
}
}
if subjects.Size() == 0 {
output.Subjects = []string{finc.NOT_ASSIGNED}
} else {
output.Subjects = subjects.SortedValues()
}
languages := container.NewStringSet()
for _, l := range doc.Index.Language {
languages.Add(LanguageMap.LookupDefault(l, "und"))
}
output.Languages = languages.Values()
for _, author := range doc.BibJson.Author {
output.Authors = append(output.Authors, finc.Author{Name: author.Name})
}
return output, nil
}
示例15: main
func main() {
filename := flag.String("file", "", "path to holdings file")
format := flag.String("format", "kbart", "holding file format, kbart, google, ovid")
permissiveMode := flag.Bool("permissive", false, "if we cannot check, we allow")
ignoreUnmarshalErrors := flag.Bool("ignore-unmarshal-errors", false, "keep using what could be unmarshalled")
version := flag.Bool("version", false, "show version")
flag.Parse()
if *version {
fmt.Println(istools.Version)
os.Exit(0)
}
if *filename == "" {
log.Fatal("holding -file required")
}
var r *bufio.Reader
if flag.NArg() == 0 {
r = bufio.NewReader(os.Stdin)
} else {
file, err := os.Open(flag.Arg(0))
if err != nil {
log.Fatal(err)
}
r = bufio.NewReader(file)
}
hfile, err := os.Open(*filename)
if err != nil {
log.Fatal(err)
}
var hr holdings.File
switch *format {
case "kbart":
hr = kbart.NewReader(hfile)
case "ovid":
hr = ovid.NewReader(hfile)
case "google":
hr = google.NewReader(hfile)
default:
log.Fatalf("invalid holding file format: %s", *format)
}
entries, err := hr.ReadAll()
if err != nil {
switch err.(type) {
case holdings.ParseError:
if *ignoreUnmarshalErrors {
log.Println(err)
} else {
log.Fatal(err)
}
default:
log.Fatal(err)
}
}
for {
b, err := r.ReadBytes('\n')
if err == io.EOF {
break
}
if err != nil {
log.Fatal(err)
}
var is finc.IntermediateSchema
if err := json.Unmarshal(b, &is); err != nil {
log.Fatal(err)
}
signature := holdings.Signature{
Date: is.Date.Format("2006-01-02"),
Volume: is.Volume,
Issue: is.Issue,
}
// validate record, if at least one license allows this item
var valid bool
var messages = container.NewStringSet()
LOOP:
for _, issn := range append(is.ISSN, is.EISSN...) {
licenses := entries.Licenses(issn)
if len(licenses) == 0 {
messages.Add(fmt.Sprintf("ISSN not in holdings"))
}
if len(licenses) == 0 && *permissiveMode {
messages.Add("PERMISSIVE_OK")
valid = true
break LOOP
}
for _, license := range licenses {
if err := license.Covers(signature); err != nil {
messages.Add(err.Error())
//.........這裏部分代碼省略.........