本文整理匯總了Golang中github.com/henrylee2cn/pholcus/common/util.FileNameReplace函數的典型用法代碼示例。如果您正苦於以下問題:Golang FileNameReplace函數的具體用法?Golang FileNameReplace怎麽用?Golang FileNameReplace使用的例子?那麽, 這裏精選的函數代碼示例或許可以為您提供幫助。
在下文中一共展示了FileNameReplace函數的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Golang代碼示例。
示例1: init
func init() {
Output["mgo"] = func(self *Collector, dataIndex int) {
var err error
//連接數據庫
mgoSession := mgo.MgoPool.GetOne().(*mgo.MgoSrc)
defer mgo.MgoPool.Free(mgoSession)
var db = mgoSession.DB(config.MGO.DB)
var namespace = util.FileNameReplace(self.namespace())
var collections = make(map[string]*mgov2.Collection)
var dataMap = make(map[string][]interface{})
for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
subNamespace := util.FileNameReplace(self.subNamespace(datacell))
if _, ok := collections[subNamespace]; !ok {
collections[subNamespace] = db.C(namespace + "__" + subNamespace)
}
for k, v := range datacell["Data"].(map[string]interface{}) {
datacell[k] = v
}
delete(datacell, "Data")
delete(datacell, "RuleName")
dataMap[subNamespace] = append(dataMap[subNamespace], datacell)
}
for k, v := range dataMap {
err = collections[k].Insert(v...)
if err != nil {
logs.Log.Error("%v", err)
}
}
}
}
示例2: New
func New(name string, subName string) Historier {
successTabName := SUCCESS_SUFFIX + "__" + name
successFileName := SUCCESS_FILE + "__" + name
failureTabName := FAILURE_SUFFIX + "__" + name
failureFileName := FAILURE_FILE + "__" + name
if subName != "" {
successTabName += "__" + subName
successFileName += "__" + subName
failureTabName += "__" + subName
failureFileName += "__" + subName
}
return &History{
Success: &Success{
tabName: util.FileNameReplace(successTabName),
fileName: successFileName,
new: make(map[string]bool),
old: make(map[string]bool),
},
Failure: &Failure{
tabName: util.FileNameReplace(failureTabName),
fileName: failureFileName,
list: make(map[string]*request.Request),
},
}
}
示例3: init
func init() {
Output["mgo"] = func(self *Collector, dataIndex int) error {
//連接數據庫
if mgo.Error() != nil {
return fmt.Errorf("MongoBD數據庫鏈接失敗: %v", mgo.Error())
}
return mgo.Call(func(src pool.Src) error {
var (
db = src.(*mgo.MgoSrc).DB(config.DB_NAME)
namespace = util.FileNameReplace(self.namespace())
collections = make(map[string]*mgov2.Collection)
dataMap = make(map[string][]interface{})
err error
)
for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
subNamespace := util.FileNameReplace(self.subNamespace(datacell))
var cName = namespace
if subNamespace != "" {
cName += "__" + subNamespace
}
if _, ok := collections[subNamespace]; !ok {
collections[subNamespace] = db.C(cName)
}
for k, v := range datacell["Data"].(map[string]interface{}) {
datacell[k] = v
}
delete(datacell, "Data")
delete(datacell, "RuleName")
if !self.Spider.OutDefaultField() {
delete(datacell, "Url")
delete(datacell, "ParentUrl")
delete(datacell, "DownloadTime")
}
dataMap[subNamespace] = append(dataMap[subNamespace], datacell)
}
for collection, docs := range dataMap {
c := collections[collection]
count := len(docs)
loop := count / mgo.MaxLen
for i := 0; i < loop; i++ {
err = c.Insert(docs[i*mgo.MaxLen : (i+1)*mgo.MaxLen]...)
if err != nil {
logs.Log.Error("%v", err)
}
}
if count%mgo.MaxLen == 0 {
continue
}
err = c.Insert(docs[loop*mgo.MaxLen:]...)
if err != nil {
logs.Log.Error("%v", err)
}
}
return nil
})
}
}
示例4: outputFile
// 文件輸出
func (self *Collector) outputFile(file data.FileCell) {
// 複用FileCell
defer func() {
data.PutFileCell(file)
self.wait.Done()
}()
// 路徑: file/"RuleName"/"time"/"Name"
p, n := filepath.Split(filepath.Clean(file["Name"].(string)))
// dir := filepath.Join(config.FILE_DIR, util.FileNameReplace(self.namespace())+"__"+cache.StartTime.Format("2006年01月02日 15時04分05秒"), p)
dir := filepath.Join(config.FILE_DIR, util.FileNameReplace(self.namespace()), p)
// 文件名
fileName := filepath.Join(dir, util.FileNameReplace(n))
// 創建/打開目錄
d, err := os.Stat(dir)
if err != nil || !d.IsDir() {
if err := os.MkdirAll(dir, 0777); err != nil {
logs.Log.Error(
" * Fail [文件下載:%v | KEYIN:%v | 批次:%v] %v [ERROR] %v\n",
self.Spider.GetName(), self.Spider.GetKeyin(), atomic.LoadUint64(&self.fileBatch), fileName, err,
)
return
}
}
// 文件不存在就以0777的權限創建文件,如果存在就在寫入之前清空內容
f, err := os.OpenFile(fileName, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0777)
if err != nil {
logs.Log.Error(
" * Fail [文件下載:%v | KEYIN:%v | 批次:%v] %v [ERROR] %v\n",
self.Spider.GetName(), self.Spider.GetKeyin(), atomic.LoadUint64(&self.fileBatch), fileName, err,
)
return
}
size, err := io.Copy(f, bytes.NewReader(file["Bytes"].([]byte)))
f.Close()
if err != nil {
logs.Log.Error(
" * Fail [文件下載:%v | KEYIN:%v | 批次:%v] %v (%s) [ERROR] %v\n",
self.Spider.GetName(), self.Spider.GetKeyin(), atomic.LoadUint64(&self.fileBatch), fileName, bytesSize.Format(uint64(size)), err,
)
return
}
// 輸出統計
self.addFileSum(1)
// 打印報告
logs.Log.Informational(" * ")
logs.Log.App(
" * [文件下載:%v | KEYIN:%v | 批次:%v] %v (%s)\n",
self.Spider.GetName(), self.Spider.GetKeyin(), atomic.LoadUint64(&self.fileBatch), fileName, bytesSize.Format(uint64(size)),
)
logs.Log.Informational(" * ")
}
示例5: init
func init() {
Output["mysql"] = func(self *Collector, dataIndex int) {
db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc)
if !ok || db == nil {
logs.Log.Error("鏈接Mysql數據庫超時,無法輸出!")
return
}
defer mysql.MysqlPool.Free(db)
var mysqls = make(map[string]*mysql.MyTable)
var namespace = util.FileNameReplace(self.namespace())
for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
subNamespace := util.FileNameReplace(self.subNamespace(datacell))
var tName = namespace
if subNamespace != "" {
tName += "__" + subNamespace
}
if _, ok := mysqls[subNamespace]; !ok {
mysqls[subNamespace] = mysql.New(db.DB)
mysqls[subNamespace].SetTableName(tName)
for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
mysqls[subNamespace].AddColumn(title + ` MEDIUMTEXT`)
}
mysqls[subNamespace].
AddColumn(`Url VARCHAR(255)`, `ParentUrl VARCHAR(255)`, `DownloadTime VARCHAR(50)`).
Create()
}
for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
vd := datacell["Data"].(map[string]interface{})
if v, ok := vd[title].(string); ok || vd[title] == nil {
mysqls[subNamespace].AddRow(v)
} else {
mysqls[subNamespace].AddRow(util.JsonString(vd[title]))
}
}
err := mysqls[subNamespace].
AddRow(datacell["Url"].(string), datacell["ParentUrl"].(string), datacell["DownloadTime"].(string)).
Update()
util.CheckErr(err)
}
}
}
示例6: SaveFile
//文件輸出管理
func (self *Collector) SaveFile() {
for !(self.CtrlLen() == 0 && len(self.FileChan) == 0) {
select {
case file := <-self.FileChan:
self.outCount[2]++
// 路徑: file/"RuleName"/"time"/"Name"
p, n := filepath.Split(filepath.Clean(file["Name"].(string)))
// dir := filepath.Join(config.FILE_DIR, util.FileNameReplace(self.namespace())+"__"+cache.StartTime.Format("2006年01月02日 15時04分05秒"), p)
dir := filepath.Join(config.FILE_DIR, util.FileNameReplace(self.namespace()), p)
// 創建/打開目錄
d, err := os.Stat(dir)
if err != nil || !d.IsDir() {
if err := os.MkdirAll(dir, 0777); err != nil {
logs.Log.Error("Error: %v\n", err)
}
}
// 輸出統計
self.addFileSum(1)
// 文件不存在就以0777的權限創建文件,如果存在就在寫入之前清空內容
fileName := filepath.Join(dir, util.FileNameReplace(n))
f, _ := os.OpenFile(fileName, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0777)
size, _ := io.Copy(f, file["Body"].(io.ReadCloser))
f.Close()
file["Body"].(io.ReadCloser).Close()
// 打印報告
logs.Log.Informational(" * ")
logs.Log.App(" * [任務:%v | KEYIN:%v] 成功下載文件: %v (%s)\n",
self.Spider.GetName(), self.Spider.GetKeyin(), fileName, bytes.Format(uint64(size)))
logs.Log.Informational(" * ")
self.outCount[3]++
// 複用FileCell
data.PutFileCell(file)
default:
runtime.Gosched()
}
}
}
示例7: init
func init() {
Output["mysql"] = func(self *Collector, dataIndex int) {
db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc)
if !ok || db == nil {
logs.Log.Error("鏈接Mysql數據庫超時,無法輸出!")
return
}
defer mysql.MysqlPool.Free(db)
var mysqls = make(map[string]*mysql.MyTable)
var namespace = util.FileNameReplace(self.namespace())
for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
subNamespace := util.FileNameReplace(self.subNamespace(datacell))
if _, ok := mysqls[subNamespace]; !ok {
mysqls[subNamespace] = mysql.New(db.DB)
mysqls[subNamespace].SetTableName("`" + namespace + "__" + subNamespace + "`")
for _, title := range self.GetRule(datacell["RuleName"].(string)).GetOutFeild() {
mysqls[subNamespace].AddColumn(title)
}
mysqls[subNamespace].
AddColumn("Url", "ParentUrl", "DownloadTime").
Create()
}
for _, title := range self.GetRule(datacell["RuleName"].(string)).GetOutFeild() {
vd := datacell["Data"].(map[string]interface{})
if v, ok := vd[title].(string); ok || vd[title] == nil {
mysqls[subNamespace].AddRow(v)
} else {
mysqls[subNamespace].AddRow(util.JsonString(vd[title]))
}
}
mysqls[subNamespace].
AddRow(datacell["Url"].(string), datacell["ParentUrl"].(string), datacell["DownloadTime"].(string)).
Update()
}
}
}
示例8: SaveFile
//文件輸出管理
func (self *Collector) SaveFile() {
for !(self.CtrlLen() == 0 && len(self.FileChan) == 0) {
select {
case file := <-self.FileChan:
self.outCount[2]++
// 統計輸出文件數
self.setFileSum(1)
// 路徑: file/"RuleName"/"time"/"Name"
p, n := path.Split(file["Name"].(string))
dir := config.COMM_PATH.FILE + `/` + util.FileNameReplace(self.namespace()) + "__" + cache.StartTime.Format("2006年01月02日 15時04分05秒") + `/` + p
// 創建/打開目錄
d, err := os.Stat(dir)
if err != nil || !d.IsDir() {
if err := os.MkdirAll(dir, 0777); err != nil {
logs.Log.Error("Error: %v\n", err)
}
}
// 創建文件
fileName := dir + util.FileNameReplace(n)
f, _ := os.Create(fileName)
io.Copy(f, file["Body"].(io.ReadCloser))
f.Close()
file["Body"].(io.ReadCloser).Close()
// 打印報告
logs.Log.Informational(" * ")
logs.Log.Notice(" * [任務:%v | 關鍵詞:%v] 成功下載文件: %v \n", self.Spider.GetName(), self.Spider.GetKeyword(), fileName)
logs.Log.Informational(" * ")
self.outCount[3]++
default:
runtime.Gosched()
}
}
}
示例9: init
/************************ excel 輸出 ***************************/
func init() {
Output["excel"] = func(self *Collector, dataIndex int) {
defer func() {
if err := recover(); err != nil {
Log.Println(err)
}
}()
var file *xlsx.File
var sheet *xlsx.Sheet
var row *xlsx.Row
var cell *xlsx.Cell
var err error
folder1 := "result/data"
folder2 := folder1 + "/" + self.startTime.Format("2006年01月02日 15時04分05秒")
filename := folder2 + "/" + util.FileNameReplace(self.Spider.GetName()+"_"+self.Spider.GetKeyword()+" "+fmt.Sprintf("%v", self.sum[0])+"-"+fmt.Sprintf("%v", self.sum[1])) + ".xlsx"
// 創建文件
file = xlsx.NewFile()
// 添加分類數據工作表
for Name, Rule := range self.GetRules() {
// 跳過不輸出的數據
if len(Rule.GetOutFeild()) == 0 {
continue
}
// 添加工作表
sheet = file.AddSheet(util.ExcelSheetNameReplace(Name))
// 寫入表頭
row = sheet.AddRow()
for _, title := range Rule.GetOutFeild() {
cell = row.AddCell()
cell.Value = title
}
cell = row.AddCell()
cell.Value = "當前鏈接"
cell = row.AddCell()
cell.Value = "上級鏈接"
cell = row.AddCell()
cell.Value = "下載時間"
num := 0 //小計
for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
if datacell["RuleName"].(string) == Name {
row = sheet.AddRow()
for _, title := range Rule.GetOutFeild() {
cell = row.AddCell()
vd := datacell["Data"].(map[string]interface{})
if v, ok := vd[title].(string); ok || vd[title] == nil {
cell.Value = v
} else {
cell.Value = util.JsonString(vd[title])
}
}
cell = row.AddCell()
cell.Value = datacell["Url"].(string)
cell = row.AddCell()
cell.Value = datacell["ParentUrl"].(string)
cell = row.AddCell()
cell.Value = datacell["DownloadTime"].(string)
num++
}
}
// Log.Printf("[任務:%v | 關鍵詞:%v | 小類:%v] 輸出 %v 條數據!!!\n", self.Spider.GetName(), self.Spider.GetKeyword(), Name, num)
}
// 創建/打開目錄
f2, err := os.Stat(folder2)
if err != nil || !f2.IsDir() {
if err := os.MkdirAll(folder2, 0777); err != nil {
Log.Printf("Error: %v\n", err)
}
}
// 保存文件
err = file.Save(filename)
if err != nil {
Log.Println(err)
}
}
}
示例10: init
func init() {
var (
mysqlTable = map[string]*mysql.MyTable{}
mysqlTableLock sync.RWMutex
)
var getMysqlTable = func(name string) (*mysql.MyTable, bool) {
mysqlTableLock.RLock()
defer mysqlTableLock.RUnlock()
tab, ok := mysqlTable[name]
if ok {
return tab.Clone(), true
}
return nil, false
}
var setMysqlTable = func(name string, tab *mysql.MyTable) {
mysqlTableLock.Lock()
mysqlTable[name] = tab
mysqlTableLock.Unlock()
}
DataOutput["mysql"] = func(self *Collector) error {
_, err := mysql.DB()
if err != nil {
return fmt.Errorf("Mysql數據庫鏈接失敗: %v", err)
}
var (
mysqls = make(map[string]*mysql.MyTable)
namespace = util.FileNameReplace(self.namespace())
)
for _, datacell := range self.dataDocker {
subNamespace := util.FileNameReplace(self.subNamespace(datacell))
tName := joinNamespaces(namespace, subNamespace)
table, ok := mysqls[tName]
if !ok {
table, ok = getMysqlTable(tName)
if ok {
mysqls[tName] = table
} else {
table = mysql.New()
table.SetTableName(tName)
for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
table.AddColumn(title + ` MEDIUMTEXT`)
}
if self.Spider.OutDefaultField() {
table.AddColumn(`Url VARCHAR(255)`, `ParentUrl VARCHAR(255)`, `DownloadTime VARCHAR(50)`)
}
if err := table.Create(); err != nil {
logs.Log.Error("%v", err)
continue
} else {
setMysqlTable(tName, table)
mysqls[tName] = table
}
}
}
data := []string{}
for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
vd := datacell["Data"].(map[string]interface{})
if v, ok := vd[title].(string); ok || vd[title] == nil {
data = append(data, v)
} else {
data = append(data, util.JsonString(vd[title]))
}
}
if self.Spider.OutDefaultField() {
data = append(data, datacell["Url"].(string), datacell["ParentUrl"].(string), datacell["DownloadTime"].(string))
}
table.AutoInsert(data)
}
for _, tab := range mysqls {
util.CheckErr(tab.FlushInsert())
}
mysqls = nil
return nil
}
}
示例11: init
/************************ CSV 輸出 ***************************/
func init() {
Output["csv"] = func(self *Collector, dataIndex int) {
defer func() {
if err := recover(); err != nil {
logs.Log.Error("%v", err)
}
}()
var namespace = util.FileNameReplace(self.namespace())
var sheets = make(map[string]*csv.Writer)
for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
var subNamespace = util.FileNameReplace(self.subNamespace(datacell))
if _, ok := sheets[subNamespace]; !ok {
folder := config.COMM_PATH.TEXT + "/" + cache.StartTime.Format("2006年01月02日 15時04分05秒") + "/" + namespace + "__" + subNamespace
filename := fmt.Sprintf("%v/%v-%v.csv", folder, self.sum[0], self.sum[1])
// 創建/打開目錄
f, err := os.Stat(folder)
if err != nil || !f.IsDir() {
if err := os.MkdirAll(folder, 0777); err != nil {
logs.Log.Error("Error: %v\n", err)
}
}
// 按數據分類創建文件
file, err := os.Create(filename)
if err != nil {
logs.Log.Error("%v", err)
continue
}
file.WriteString("\xEF\xBB\xBF") // 寫入UTF-8 BOM
sheets[subNamespace] = csv.NewWriter(file)
th := self.MustGetRule(datacell["RuleName"].(string)).ItemFields
th = append(th, "當前鏈接", "上級鏈接", "下載時間")
sheets[subNamespace].Write(th)
defer func(file *os.File) {
// 發送緩存數據流
sheets[subNamespace].Flush()
// 關閉文件
file.Close()
}(file)
}
row := []string{}
for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
vd := datacell["Data"].(map[string]interface{})
if v, ok := vd[title].(string); ok || vd[title] == nil {
row = append(row, v)
} else {
row = append(row, util.JsonString(vd[title]))
}
}
row = append(row, datacell["Url"].(string))
row = append(row, datacell["ParentUrl"].(string))
row = append(row, datacell["DownloadTime"].(string))
sheets[subNamespace].Write(row)
}
}
}
示例12: init
/************************ CSV 輸出 ***************************/
func init() {
Output["csv"] = func(self *Collector, dataIndex int) {
defer func() {
if err := recover(); err != nil {
Log.Println(err)
}
}()
folder1 := "result/data"
folder2 := folder1 + "/" + self.startTime.Format("2006年01月02日 15時04分05秒")
filenameBase := folder2 + "/" + util.FileNameReplace(self.Spider.GetName()+"_"+self.Spider.GetKeyword()+" "+fmt.Sprintf("%v", self.sum[0])+"-"+fmt.Sprintf("%v", self.sum[1]))
// 創建/打開目錄
f2, err := os.Stat(folder2)
if err != nil || !f2.IsDir() {
if err := os.MkdirAll(folder2, 0777); err != nil {
Log.Printf("Error: %v\n", err)
}
}
// 按數據分類創建文件
for Name, Rule := range self.GetRules() {
// 跳過不輸出的數據
if len(Rule.GetOutFeild()) == 0 {
continue
}
file, err := os.Create(filenameBase + " (" + util.FileNameReplace(Name) + ").csv")
if err != nil {
Log.Println(err)
continue
}
file.WriteString("\xEF\xBB\xBF") // 寫入UTF-8 BOM
w := csv.NewWriter(file)
th := Rule.GetOutFeild()
th = append(th, []string{"當前鏈接", "上級鏈接", "下載時間"}...)
w.Write(th)
num := 0 //小計
for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
if datacell["RuleName"].(string) == Name {
row := []string{}
for _, title := range Rule.GetOutFeild() {
vd := datacell["Data"].(map[string]interface{})
if v, ok := vd[title].(string); ok || vd[title] == nil {
row = append(row, v)
} else {
row = append(row, util.JsonString(vd[title]))
}
}
row = append(row, datacell["Url"].(string))
row = append(row, datacell["ParentUrl"].(string))
row = append(row, datacell["DownloadTime"].(string))
w.Write(row)
num++
}
}
// 發送緩存數據流
w.Flush()
// 關閉文件
file.Close()
// 輸出報告
// Log.Printf("[任務:%v | 關鍵詞:%v | 小類:%v] 輸出 %v 條數據!!!\n", self.Spider.GetName(), self.Spider.GetKeyword(), Name, num)
}
}
}
示例13: init
func init() {
defer func() {
// 獲取輸出方式列表
for out, _ := range Output {
OutputLib = append(OutputLib, out)
}
util.StringsSort(OutputLib)
}()
/************************ excel 輸出 ***************************/
Output["excel"] = func(self *Collector, dataIndex int) {
defer func() {
if err := recover(); err != nil {
Log.Println(err)
}
}()
var file *xlsx.File
var sheet *xlsx.Sheet
var row *xlsx.Row
var cell *xlsx.Cell
var err error
folder1 := "result/data"
folder2 := folder1 + "/" + self.startTime.Format("2006年01月02日 15時04分05秒")
filename := folder2 + "/" + util.FileNameReplace(self.Spider.GetName()+"_"+self.Spider.GetKeyword()+" "+fmt.Sprintf("%v", self.sum[0])+"-"+fmt.Sprintf("%v", self.sum[1])) + ".xlsx"
// 創建文件
file = xlsx.NewFile()
// 添加分類數據工作表
for Name, Rule := range self.GetRules() {
// 跳過不輸出的數據
if len(Rule.GetOutFeild()) == 0 {
continue
}
// 添加工作表
sheet = file.AddSheet(util.ExcelSheetNameReplace(Name))
// 寫入表頭
row = sheet.AddRow()
for _, title := range Rule.GetOutFeild() {
cell = row.AddCell()
cell.Value = title
}
cell = row.AddCell()
cell.Value = "當前鏈接"
cell = row.AddCell()
cell.Value = "上級鏈接"
cell = row.AddCell()
cell.Value = "下載時間"
num := 0 //小計
for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
if datacell["RuleName"].(string) == Name {
row = sheet.AddRow()
for _, title := range Rule.GetOutFeild() {
cell = row.AddCell()
vd := datacell["Data"].(map[string]interface{})
if v, ok := vd[title].(string); ok || vd[title] == nil {
cell.Value = v
} else {
cell.Value = util.JsonString(vd[title])
}
}
cell = row.AddCell()
cell.Value = datacell["Url"].(string)
cell = row.AddCell()
cell.Value = datacell["ParentUrl"].(string)
cell = row.AddCell()
cell.Value = datacell["DownloadTime"].(string)
num++
}
}
// Log.Printf("[任務:%v | 關鍵詞:%v | 小類:%v] 輸出 %v 條數據!!!\n", self.Spider.GetName(), self.Spider.GetKeyword(), Name, num)
}
// 創建/打開目錄
f2, err := os.Stat(folder2)
if err != nil || !f2.IsDir() {
if err := os.MkdirAll(folder2, 0777); err != nil {
Log.Printf("Error: %v\n", err)
}
}
// 保存文件
err = file.Save(filename)
if err != nil {
Log.Println(err)
}
}
/************************ CSV 輸出 ***************************/
Output["csv"] = func(self *Collector, dataIndex int) {
defer func() {
if err := recover(); err != nil {
Log.Println(err)
//.........這裏部分代碼省略.........
示例14: init
/************************ excel 輸出 ***************************/
func init() {
Output["excel"] = func(self *Collector, dataIndex int) (err error) {
defer func() {
if p := recover(); p != nil {
err = fmt.Errorf("%v", p)
}
}()
var (
file *xlsx.File
row *xlsx.Row
cell *xlsx.Cell
sheets = make(map[string]*xlsx.Sheet)
)
// 創建文件
file = xlsx.NewFile()
// 添加分類數據工作表
for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
var subNamespace = util.FileNameReplace(self.subNamespace(datacell))
if _, ok := sheets[subNamespace]; !ok {
// 添加工作表
sheet, err := file.AddSheet(subNamespace)
if err != nil {
logs.Log.Error("%v", err)
continue
}
sheets[subNamespace] = sheet
// 寫入表頭
row = sheets[subNamespace].AddRow()
for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
row.AddCell().Value = title
}
if self.Spider.OutDefaultField() {
row.AddCell().Value = "當前鏈接"
row.AddCell().Value = "上級鏈接"
row.AddCell().Value = "下載時間"
}
}
row = sheets[subNamespace].AddRow()
for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
cell = row.AddCell()
vd := datacell["Data"].(map[string]interface{})
if v, ok := vd[title].(string); ok || vd[title] == nil {
cell.Value = v
} else {
cell.Value = util.JsonString(vd[title])
}
}
if self.Spider.OutDefaultField() {
row.AddCell().Value = datacell["Url"].(string)
row.AddCell().Value = datacell["ParentUrl"].(string)
row.AddCell().Value = datacell["DownloadTime"].(string)
}
}
folder := config.TEXT_DIR + "/" + cache.StartTime.Format("2006年01月02日 15時04分05秒")
filename := fmt.Sprintf("%v/%v__%v-%v.xlsx", folder, util.FileNameReplace(self.namespace()), self.sum[0], self.sum[1])
// 創建/打開目錄
f2, err := os.Stat(folder)
if err != nil || !f2.IsDir() {
if err := os.MkdirAll(folder, 0777); err != nil {
logs.Log.Error("Error: %v\n", err)
}
}
// 保存文件
err = file.Save(filename)
return
}
}
示例15: init
/************************ Kafka 輸出 ***************************/
func init() {
var (
kafkaSenders = map[string]*kafka.KafkaSender{}
kafkaSenderLock sync.RWMutex
)
var getKafkaSender = func(name string) (*kafka.KafkaSender, bool) {
kafkaSenderLock.RLock()
tab, ok := kafkaSenders[name]
kafkaSenderLock.RUnlock()
return tab, ok
}
var setKafkaSender = func(name string, tab *kafka.KafkaSender) {
kafkaSenderLock.Lock()
kafkaSenders[name] = tab
kafkaSenderLock.Unlock()
}
DataOutput["kafka"] = func(self *Collector) error {
_, err := kafka.GetProducer()
if err != nil {
return fmt.Errorf("kafka producer失敗: %v", err)
}
var (
kafkas = make(map[string]*kafka.KafkaSender)
namespace = util.FileNameReplace(self.namespace())
)
for _, datacell := range self.dataDocker {
subNamespace := util.FileNameReplace(self.subNamespace(datacell))
topicName := joinNamespaces(namespace, subNamespace)
sender, ok := kafkas[topicName]
if !ok {
sender, ok = getKafkaSender(topicName)
if ok {
kafkas[topicName] = sender
} else {
sender = kafka.New()
sender.SetTopic(topicName)
setKafkaSender(topicName, sender)
kafkas[topicName] = sender
}
}
data := make(map[string]interface{})
for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
vd := datacell["Data"].(map[string]interface{})
if v, ok := vd[title].(string); ok || vd[title] == nil {
data[title] = v
} else {
data[title] = util.JsonString(vd[title])
}
}
if self.Spider.OutDefaultField() {
data["url"] = datacell["Url"].(string)
data["parent_url"] = datacell["ParentUrl"].(string)
data["download_time"] = datacell["DownloadTime"].(string)
}
err := sender.Push(data)
util.CheckErr(err)
}
kafkas = nil
return nil
}
}