當前位置: 首頁>>代碼示例>>Golang>>正文


Golang util.FileNameReplace函數代碼示例

本文整理匯總了Golang中github.com/henrylee2cn/pholcus/common/util.FileNameReplace函數的典型用法代碼示例。如果您正苦於以下問題:Golang FileNameReplace函數的具體用法?Golang FileNameReplace怎麽用?Golang FileNameReplace使用的例子?那麽, 這裏精選的函數代碼示例或許可以為您提供幫助。


在下文中一共展示了FileNameReplace函數的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Golang代碼示例。

示例1: init

func init() {
	Output["mgo"] = func(self *Collector, dataIndex int) {
		var err error
		//連接數據庫
		mgoSession := mgo.MgoPool.GetOne().(*mgo.MgoSrc)
		defer mgo.MgoPool.Free(mgoSession)

		var db = mgoSession.DB(config.MGO.DB)
		var namespace = util.FileNameReplace(self.namespace())
		var collections = make(map[string]*mgov2.Collection)
		var dataMap = make(map[string][]interface{})

		for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
			subNamespace := util.FileNameReplace(self.subNamespace(datacell))
			if _, ok := collections[subNamespace]; !ok {
				collections[subNamespace] = db.C(namespace + "__" + subNamespace)
			}
			for k, v := range datacell["Data"].(map[string]interface{}) {
				datacell[k] = v
			}
			delete(datacell, "Data")
			delete(datacell, "RuleName")
			dataMap[subNamespace] = append(dataMap[subNamespace], datacell)
		}

		for k, v := range dataMap {
			err = collections[k].Insert(v...)
			if err != nil {
				logs.Log.Error("%v", err)
			}
		}
	}
}
開發者ID:nathena,項目名稱:pholcus,代碼行數:33,代碼來源:output_mgo.go

示例2: New

func New(name string, subName string) Historier {
	successTabName := SUCCESS_SUFFIX + "__" + name
	successFileName := SUCCESS_FILE + "__" + name
	failureTabName := FAILURE_SUFFIX + "__" + name
	failureFileName := FAILURE_FILE + "__" + name
	if subName != "" {
		successTabName += "__" + subName
		successFileName += "__" + subName
		failureTabName += "__" + subName
		failureFileName += "__" + subName
	}
	return &History{
		Success: &Success{
			tabName:  util.FileNameReplace(successTabName),
			fileName: successFileName,
			new:      make(map[string]bool),
			old:      make(map[string]bool),
		},
		Failure: &Failure{
			tabName:  util.FileNameReplace(failureTabName),
			fileName: failureFileName,
			list:     make(map[string]*request.Request),
		},
	}
}
開發者ID:henrylee2cn,項目名稱:pholcus,代碼行數:25,代碼來源:history.go

示例3: init

func init() {
	Output["mgo"] = func(self *Collector, dataIndex int) error {
		//連接數據庫
		if mgo.Error() != nil {
			return fmt.Errorf("MongoBD數據庫鏈接失敗: %v", mgo.Error())
		}
		return mgo.Call(func(src pool.Src) error {
			var (
				db          = src.(*mgo.MgoSrc).DB(config.DB_NAME)
				namespace   = util.FileNameReplace(self.namespace())
				collections = make(map[string]*mgov2.Collection)
				dataMap     = make(map[string][]interface{})
				err         error
			)

			for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
				subNamespace := util.FileNameReplace(self.subNamespace(datacell))
				var cName = namespace
				if subNamespace != "" {
					cName += "__" + subNamespace
				}
				if _, ok := collections[subNamespace]; !ok {
					collections[subNamespace] = db.C(cName)
				}
				for k, v := range datacell["Data"].(map[string]interface{}) {
					datacell[k] = v
				}
				delete(datacell, "Data")
				delete(datacell, "RuleName")
				if !self.Spider.OutDefaultField() {
					delete(datacell, "Url")
					delete(datacell, "ParentUrl")
					delete(datacell, "DownloadTime")
				}
				dataMap[subNamespace] = append(dataMap[subNamespace], datacell)
			}

			for collection, docs := range dataMap {
				c := collections[collection]
				count := len(docs)
				loop := count / mgo.MaxLen
				for i := 0; i < loop; i++ {
					err = c.Insert(docs[i*mgo.MaxLen : (i+1)*mgo.MaxLen]...)
					if err != nil {
						logs.Log.Error("%v", err)
					}
				}
				if count%mgo.MaxLen == 0 {
					continue
				}
				err = c.Insert(docs[loop*mgo.MaxLen:]...)
				if err != nil {
					logs.Log.Error("%v", err)
				}
			}

			return nil
		})
	}
}
開發者ID:clock145,項目名稱:pholcus,代碼行數:60,代碼來源:output_mgo.go

示例4: outputFile

// 文件輸出
func (self *Collector) outputFile(file data.FileCell) {
	// 複用FileCell
	defer func() {
		data.PutFileCell(file)
		self.wait.Done()
	}()

	// 路徑: file/"RuleName"/"time"/"Name"
	p, n := filepath.Split(filepath.Clean(file["Name"].(string)))
	// dir := filepath.Join(config.FILE_DIR, util.FileNameReplace(self.namespace())+"__"+cache.StartTime.Format("2006年01月02日 15時04分05秒"), p)
	dir := filepath.Join(config.FILE_DIR, util.FileNameReplace(self.namespace()), p)

	// 文件名
	fileName := filepath.Join(dir, util.FileNameReplace(n))

	// 創建/打開目錄
	d, err := os.Stat(dir)
	if err != nil || !d.IsDir() {
		if err := os.MkdirAll(dir, 0777); err != nil {
			logs.Log.Error(
				" *     Fail  [文件下載:%v | KEYIN:%v | 批次:%v]   %v [ERROR]  %v\n",
				self.Spider.GetName(), self.Spider.GetKeyin(), atomic.LoadUint64(&self.fileBatch), fileName, err,
			)
			return
		}
	}

	// 文件不存在就以0777的權限創建文件,如果存在就在寫入之前清空內容
	f, err := os.OpenFile(fileName, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0777)
	if err != nil {
		logs.Log.Error(
			" *     Fail  [文件下載:%v | KEYIN:%v | 批次:%v]   %v [ERROR]  %v\n",
			self.Spider.GetName(), self.Spider.GetKeyin(), atomic.LoadUint64(&self.fileBatch), fileName, err,
		)
		return
	}

	size, err := io.Copy(f, bytes.NewReader(file["Bytes"].([]byte)))
	f.Close()
	if err != nil {
		logs.Log.Error(
			" *     Fail  [文件下載:%v | KEYIN:%v | 批次:%v]   %v (%s) [ERROR]  %v\n",
			self.Spider.GetName(), self.Spider.GetKeyin(), atomic.LoadUint64(&self.fileBatch), fileName, bytesSize.Format(uint64(size)), err,
		)
		return
	}

	// 輸出統計
	self.addFileSum(1)

	// 打印報告
	logs.Log.Informational(" * ")
	logs.Log.App(
		" *     [文件下載:%v | KEYIN:%v | 批次:%v]   %v (%s)\n",
		self.Spider.GetName(), self.Spider.GetKeyin(), atomic.LoadUint64(&self.fileBatch), fileName, bytesSize.Format(uint64(size)),
	)
	logs.Log.Informational(" * ")
}
開發者ID:henrylee2cn,項目名稱:pholcus,代碼行數:59,代碼來源:output_file.go

示例5: init

func init() {
	Output["mysql"] = func(self *Collector, dataIndex int) {
		db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc)
		if !ok || db == nil {
			logs.Log.Error("鏈接Mysql數據庫超時,無法輸出!")
			return
		}
		defer mysql.MysqlPool.Free(db)

		var mysqls = make(map[string]*mysql.MyTable)
		var namespace = util.FileNameReplace(self.namespace())

		for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
			subNamespace := util.FileNameReplace(self.subNamespace(datacell))
			var tName = namespace
			if subNamespace != "" {
				tName += "__" + subNamespace
			}
			if _, ok := mysqls[subNamespace]; !ok {
				mysqls[subNamespace] = mysql.New(db.DB)
				mysqls[subNamespace].SetTableName(tName)
				for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
					mysqls[subNamespace].AddColumn(title + ` MEDIUMTEXT`)
				}

				mysqls[subNamespace].
					AddColumn(`Url VARCHAR(255)`, `ParentUrl VARCHAR(255)`, `DownloadTime VARCHAR(50)`).
					Create()
			}

			for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
				vd := datacell["Data"].(map[string]interface{})
				if v, ok := vd[title].(string); ok || vd[title] == nil {
					mysqls[subNamespace].AddRow(v)
				} else {
					mysqls[subNamespace].AddRow(util.JsonString(vd[title]))
				}
			}

			err := mysqls[subNamespace].
				AddRow(datacell["Url"].(string), datacell["ParentUrl"].(string), datacell["DownloadTime"].(string)).
				Update()
			util.CheckErr(err)
		}
	}
}
開發者ID:Cdim,項目名稱:pholcus,代碼行數:46,代碼來源:output_mysql.go

示例6: SaveFile

//文件輸出管理
func (self *Collector) SaveFile() {
	for !(self.CtrlLen() == 0 && len(self.FileChan) == 0) {
		select {
		case file := <-self.FileChan:
			self.outCount[2]++

			// 路徑: file/"RuleName"/"time"/"Name"
			p, n := filepath.Split(filepath.Clean(file["Name"].(string)))
			// dir := filepath.Join(config.FILE_DIR, util.FileNameReplace(self.namespace())+"__"+cache.StartTime.Format("2006年01月02日 15時04分05秒"), p)
			dir := filepath.Join(config.FILE_DIR, util.FileNameReplace(self.namespace()), p)

			// 創建/打開目錄
			d, err := os.Stat(dir)
			if err != nil || !d.IsDir() {
				if err := os.MkdirAll(dir, 0777); err != nil {
					logs.Log.Error("Error: %v\n", err)
				}
			}

			// 輸出統計
			self.addFileSum(1)

			// 文件不存在就以0777的權限創建文件,如果存在就在寫入之前清空內容
			fileName := filepath.Join(dir, util.FileNameReplace(n))
			f, _ := os.OpenFile(fileName, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0777)
			size, _ := io.Copy(f, file["Body"].(io.ReadCloser))

			f.Close()
			file["Body"].(io.ReadCloser).Close()

			// 打印報告
			logs.Log.Informational(" * ")
			logs.Log.App(" *     [任務:%v | KEYIN:%v]   成功下載文件: %v (%s)\n",
				self.Spider.GetName(), self.Spider.GetKeyin(), fileName, bytes.Format(uint64(size)))
			logs.Log.Informational(" * ")

			self.outCount[3]++

			// 複用FileCell
			data.PutFileCell(file)
		default:
			runtime.Gosched()
		}
	}
}
開發者ID:ReinhardHsu,項目名稱:pholcus,代碼行數:46,代碼來源:savefile.go

示例7: init

func init() {
	Output["mysql"] = func(self *Collector, dataIndex int) {
		db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc)
		if !ok || db == nil {
			logs.Log.Error("鏈接Mysql數據庫超時,無法輸出!")
			return
		}
		defer mysql.MysqlPool.Free(db)

		var mysqls = make(map[string]*mysql.MyTable)
		var namespace = util.FileNameReplace(self.namespace())

		for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
			subNamespace := util.FileNameReplace(self.subNamespace(datacell))
			if _, ok := mysqls[subNamespace]; !ok {
				mysqls[subNamespace] = mysql.New(db.DB)
				mysqls[subNamespace].SetTableName("`" + namespace + "__" + subNamespace + "`")
				for _, title := range self.GetRule(datacell["RuleName"].(string)).GetOutFeild() {
					mysqls[subNamespace].AddColumn(title)
				}

				mysqls[subNamespace].
					AddColumn("Url", "ParentUrl", "DownloadTime").
					Create()
			}

			for _, title := range self.GetRule(datacell["RuleName"].(string)).GetOutFeild() {
				vd := datacell["Data"].(map[string]interface{})
				if v, ok := vd[title].(string); ok || vd[title] == nil {
					mysqls[subNamespace].AddRow(v)
				} else {
					mysqls[subNamespace].AddRow(util.JsonString(vd[title]))
				}
			}

			mysqls[subNamespace].
				AddRow(datacell["Url"].(string), datacell["ParentUrl"].(string), datacell["DownloadTime"].(string)).
				Update()
		}
	}
}
開發者ID:BobbWu,項目名稱:pholcus,代碼行數:41,代碼來源:output_mysql.go

示例8: SaveFile

//文件輸出管理
func (self *Collector) SaveFile() {
	for !(self.CtrlLen() == 0 && len(self.FileChan) == 0) {
		select {
		case file := <-self.FileChan:
			self.outCount[2]++

			// 統計輸出文件數
			self.setFileSum(1)

			// 路徑: file/"RuleName"/"time"/"Name"
			p, n := path.Split(file["Name"].(string))
			dir := config.COMM_PATH.FILE + `/` + util.FileNameReplace(self.namespace()) + "__" + cache.StartTime.Format("2006年01月02日 15時04分05秒") + `/` + p

			// 創建/打開目錄
			d, err := os.Stat(dir)
			if err != nil || !d.IsDir() {
				if err := os.MkdirAll(dir, 0777); err != nil {
					logs.Log.Error("Error: %v\n", err)
				}
			}

			// 創建文件
			fileName := dir + util.FileNameReplace(n)
			f, _ := os.Create(fileName)
			io.Copy(f, file["Body"].(io.ReadCloser))
			f.Close()
			file["Body"].(io.ReadCloser).Close()

			// 打印報告
			logs.Log.Informational(" * ")
			logs.Log.Notice(" *     [任務:%v | 關鍵詞:%v]   成功下載文件: %v \n", self.Spider.GetName(), self.Spider.GetKeyword(), fileName)
			logs.Log.Informational(" * ")

			self.outCount[3]++
		default:
			runtime.Gosched()
		}
	}
}
開發者ID:Cdim,項目名稱:pholcus,代碼行數:40,代碼來源:savefile.go

示例9: init

/************************ excel 輸出 ***************************/
func init() {
	Output["excel"] = func(self *Collector, dataIndex int) {
		defer func() {
			if err := recover(); err != nil {
				Log.Println(err)
			}
		}()

		var file *xlsx.File
		var sheet *xlsx.Sheet
		var row *xlsx.Row
		var cell *xlsx.Cell
		var err error

		folder1 := "result/data"
		folder2 := folder1 + "/" + self.startTime.Format("2006年01月02日 15時04分05秒")
		filename := folder2 + "/" + util.FileNameReplace(self.Spider.GetName()+"_"+self.Spider.GetKeyword()+" "+fmt.Sprintf("%v", self.sum[0])+"-"+fmt.Sprintf("%v", self.sum[1])) + ".xlsx"

		// 創建文件
		file = xlsx.NewFile()

		// 添加分類數據工作表
		for Name, Rule := range self.GetRules() {
			// 跳過不輸出的數據
			if len(Rule.GetOutFeild()) == 0 {
				continue
			}
			// 添加工作表
			sheet = file.AddSheet(util.ExcelSheetNameReplace(Name))
			// 寫入表頭
			row = sheet.AddRow()
			for _, title := range Rule.GetOutFeild() {
				cell = row.AddCell()
				cell.Value = title
			}
			cell = row.AddCell()
			cell.Value = "當前鏈接"
			cell = row.AddCell()
			cell.Value = "上級鏈接"
			cell = row.AddCell()
			cell.Value = "下載時間"

			num := 0 //小計
			for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
				if datacell["RuleName"].(string) == Name {
					row = sheet.AddRow()
					for _, title := range Rule.GetOutFeild() {
						cell = row.AddCell()
						vd := datacell["Data"].(map[string]interface{})
						if v, ok := vd[title].(string); ok || vd[title] == nil {
							cell.Value = v
						} else {
							cell.Value = util.JsonString(vd[title])
						}
					}
					cell = row.AddCell()
					cell.Value = datacell["Url"].(string)
					cell = row.AddCell()
					cell.Value = datacell["ParentUrl"].(string)
					cell = row.AddCell()
					cell.Value = datacell["DownloadTime"].(string)
					num++
				}
			}

			// Log.Printf("[任務:%v | 關鍵詞:%v | 小類:%v] 輸出 %v 條數據!!!\n", self.Spider.GetName(), self.Spider.GetKeyword(), Name, num)

		}

		// 創建/打開目錄
		f2, err := os.Stat(folder2)
		if err != nil || !f2.IsDir() {
			if err := os.MkdirAll(folder2, 0777); err != nil {
				Log.Printf("Error: %v\n", err)
			}
		}

		// 保存文件
		err = file.Save(filename)

		if err != nil {
			Log.Println(err)
		}

	}
}
開發者ID:rorovic,項目名稱:pholcus,代碼行數:87,代碼來源:output_excel.go

示例10: init

func init() {
	var (
		mysqlTable     = map[string]*mysql.MyTable{}
		mysqlTableLock sync.RWMutex
	)

	var getMysqlTable = func(name string) (*mysql.MyTable, bool) {
		mysqlTableLock.RLock()
		defer mysqlTableLock.RUnlock()
		tab, ok := mysqlTable[name]
		if ok {
			return tab.Clone(), true
		}
		return nil, false
	}

	var setMysqlTable = func(name string, tab *mysql.MyTable) {
		mysqlTableLock.Lock()
		mysqlTable[name] = tab
		mysqlTableLock.Unlock()
	}

	DataOutput["mysql"] = func(self *Collector) error {
		_, err := mysql.DB()
		if err != nil {
			return fmt.Errorf("Mysql數據庫鏈接失敗: %v", err)
		}
		var (
			mysqls    = make(map[string]*mysql.MyTable)
			namespace = util.FileNameReplace(self.namespace())
		)
		for _, datacell := range self.dataDocker {
			subNamespace := util.FileNameReplace(self.subNamespace(datacell))
			tName := joinNamespaces(namespace, subNamespace)
			table, ok := mysqls[tName]
			if !ok {
				table, ok = getMysqlTable(tName)
				if ok {
					mysqls[tName] = table
				} else {
					table = mysql.New()
					table.SetTableName(tName)
					for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
						table.AddColumn(title + ` MEDIUMTEXT`)
					}
					if self.Spider.OutDefaultField() {
						table.AddColumn(`Url VARCHAR(255)`, `ParentUrl VARCHAR(255)`, `DownloadTime VARCHAR(50)`)
					}
					if err := table.Create(); err != nil {
						logs.Log.Error("%v", err)
						continue
					} else {
						setMysqlTable(tName, table)
						mysqls[tName] = table
					}
				}
			}
			data := []string{}
			for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
				vd := datacell["Data"].(map[string]interface{})
				if v, ok := vd[title].(string); ok || vd[title] == nil {
					data = append(data, v)
				} else {
					data = append(data, util.JsonString(vd[title]))
				}
			}
			if self.Spider.OutDefaultField() {
				data = append(data, datacell["Url"].(string), datacell["ParentUrl"].(string), datacell["DownloadTime"].(string))
			}
			table.AutoInsert(data)
		}
		for _, tab := range mysqls {
			util.CheckErr(tab.FlushInsert())
		}
		mysqls = nil
		return nil
	}
}
開發者ID:henrylee2cn,項目名稱:pholcus,代碼行數:78,代碼來源:output_mysql.go

示例11: init

/************************ CSV 輸出 ***************************/
func init() {
	Output["csv"] = func(self *Collector, dataIndex int) {
		defer func() {
			if err := recover(); err != nil {
				logs.Log.Error("%v", err)
			}
		}()
		var namespace = util.FileNameReplace(self.namespace())
		var sheets = make(map[string]*csv.Writer)
		for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
			var subNamespace = util.FileNameReplace(self.subNamespace(datacell))
			if _, ok := sheets[subNamespace]; !ok {
				folder := config.COMM_PATH.TEXT + "/" + cache.StartTime.Format("2006年01月02日 15時04分05秒") + "/" + namespace + "__" + subNamespace
				filename := fmt.Sprintf("%v/%v-%v.csv", folder, self.sum[0], self.sum[1])

				// 創建/打開目錄
				f, err := os.Stat(folder)
				if err != nil || !f.IsDir() {
					if err := os.MkdirAll(folder, 0777); err != nil {
						logs.Log.Error("Error: %v\n", err)
					}
				}

				// 按數據分類創建文件
				file, err := os.Create(filename)

				if err != nil {
					logs.Log.Error("%v", err)
					continue
				}

				file.WriteString("\xEF\xBB\xBF") // 寫入UTF-8 BOM

				sheets[subNamespace] = csv.NewWriter(file)
				th := self.MustGetRule(datacell["RuleName"].(string)).ItemFields
				th = append(th, "當前鏈接", "上級鏈接", "下載時間")
				sheets[subNamespace].Write(th)

				defer func(file *os.File) {
					// 發送緩存數據流
					sheets[subNamespace].Flush()
					// 關閉文件
					file.Close()
				}(file)
			}

			row := []string{}
			for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
				vd := datacell["Data"].(map[string]interface{})
				if v, ok := vd[title].(string); ok || vd[title] == nil {
					row = append(row, v)
				} else {
					row = append(row, util.JsonString(vd[title]))
				}
			}

			row = append(row, datacell["Url"].(string))
			row = append(row, datacell["ParentUrl"].(string))
			row = append(row, datacell["DownloadTime"].(string))
			sheets[subNamespace].Write(row)
		}
	}
}
開發者ID:Cdim,項目名稱:pholcus,代碼行數:64,代碼來源:output_csv.go

示例12: init

/************************ CSV 輸出 ***************************/
func init() {
	Output["csv"] = func(self *Collector, dataIndex int) {
		defer func() {
			if err := recover(); err != nil {
				Log.Println(err)
			}
		}()

		folder1 := "result/data"
		folder2 := folder1 + "/" + self.startTime.Format("2006年01月02日 15時04分05秒")
		filenameBase := folder2 + "/" + util.FileNameReplace(self.Spider.GetName()+"_"+self.Spider.GetKeyword()+" "+fmt.Sprintf("%v", self.sum[0])+"-"+fmt.Sprintf("%v", self.sum[1]))

		// 創建/打開目錄
		f2, err := os.Stat(folder2)
		if err != nil || !f2.IsDir() {
			if err := os.MkdirAll(folder2, 0777); err != nil {
				Log.Printf("Error: %v\n", err)
			}
		}

		// 按數據分類創建文件
		for Name, Rule := range self.GetRules() {
			// 跳過不輸出的數據
			if len(Rule.GetOutFeild()) == 0 {
				continue
			}

			file, err := os.Create(filenameBase + " (" + util.FileNameReplace(Name) + ").csv")

			if err != nil {
				Log.Println(err)
				continue
			}

			file.WriteString("\xEF\xBB\xBF") // 寫入UTF-8 BOM
			w := csv.NewWriter(file)
			th := Rule.GetOutFeild()
			th = append(th, []string{"當前鏈接", "上級鏈接", "下載時間"}...)
			w.Write(th)

			num := 0 //小計
			for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
				if datacell["RuleName"].(string) == Name {
					row := []string{}
					for _, title := range Rule.GetOutFeild() {
						vd := datacell["Data"].(map[string]interface{})
						if v, ok := vd[title].(string); ok || vd[title] == nil {
							row = append(row, v)
						} else {
							row = append(row, util.JsonString(vd[title]))
						}
					}

					row = append(row, datacell["Url"].(string))
					row = append(row, datacell["ParentUrl"].(string))
					row = append(row, datacell["DownloadTime"].(string))
					w.Write(row)

					num++
				}
			}
			// 發送緩存數據流
			w.Flush()
			// 關閉文件
			file.Close()
			// 輸出報告
			// Log.Printf("[任務:%v | 關鍵詞:%v | 小類:%v] 輸出 %v 條數據!!!\n", self.Spider.GetName(), self.Spider.GetKeyword(), Name, num)
		}
	}
}
開發者ID:rorovic,項目名稱:pholcus,代碼行數:71,代碼來源:output_csv.go

示例13: init

func init() {
	defer func() {
		// 獲取輸出方式列表
		for out, _ := range Output {
			OutputLib = append(OutputLib, out)
		}
		util.StringsSort(OutputLib)
	}()

	/************************ excel 輸出 ***************************/
	Output["excel"] = func(self *Collector, dataIndex int) {
		defer func() {
			if err := recover(); err != nil {
				Log.Println(err)
			}
		}()

		var file *xlsx.File
		var sheet *xlsx.Sheet
		var row *xlsx.Row
		var cell *xlsx.Cell
		var err error

		folder1 := "result/data"
		folder2 := folder1 + "/" + self.startTime.Format("2006年01月02日 15時04分05秒")
		filename := folder2 + "/" + util.FileNameReplace(self.Spider.GetName()+"_"+self.Spider.GetKeyword()+" "+fmt.Sprintf("%v", self.sum[0])+"-"+fmt.Sprintf("%v", self.sum[1])) + ".xlsx"

		// 創建文件
		file = xlsx.NewFile()

		// 添加分類數據工作表
		for Name, Rule := range self.GetRules() {
			// 跳過不輸出的數據
			if len(Rule.GetOutFeild()) == 0 {
				continue
			}
			// 添加工作表
			sheet = file.AddSheet(util.ExcelSheetNameReplace(Name))
			// 寫入表頭
			row = sheet.AddRow()
			for _, title := range Rule.GetOutFeild() {
				cell = row.AddCell()
				cell.Value = title
			}
			cell = row.AddCell()
			cell.Value = "當前鏈接"
			cell = row.AddCell()
			cell.Value = "上級鏈接"
			cell = row.AddCell()
			cell.Value = "下載時間"

			num := 0 //小計
			for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
				if datacell["RuleName"].(string) == Name {
					row = sheet.AddRow()
					for _, title := range Rule.GetOutFeild() {
						cell = row.AddCell()
						vd := datacell["Data"].(map[string]interface{})
						if v, ok := vd[title].(string); ok || vd[title] == nil {
							cell.Value = v
						} else {
							cell.Value = util.JsonString(vd[title])
						}
					}
					cell = row.AddCell()
					cell.Value = datacell["Url"].(string)
					cell = row.AddCell()
					cell.Value = datacell["ParentUrl"].(string)
					cell = row.AddCell()
					cell.Value = datacell["DownloadTime"].(string)
					num++
				}
			}

			// Log.Printf("[任務:%v | 關鍵詞:%v | 小類:%v] 輸出 %v 條數據!!!\n", self.Spider.GetName(), self.Spider.GetKeyword(), Name, num)

		}

		// 創建/打開目錄
		f2, err := os.Stat(folder2)
		if err != nil || !f2.IsDir() {
			if err := os.MkdirAll(folder2, 0777); err != nil {
				Log.Printf("Error: %v\n", err)
			}
		}

		// 保存文件
		err = file.Save(filename)

		if err != nil {
			Log.Println(err)
		}

	}

	/************************ CSV 輸出 ***************************/
	Output["csv"] = func(self *Collector, dataIndex int) {
		defer func() {
			if err := recover(); err != nil {
				Log.Println(err)
//.........這裏部分代碼省略.........
開發者ID:npk,項目名稱:pholcus-1,代碼行數:101,代碼來源:output_lib.go

示例14: init

/************************ excel 輸出 ***************************/
func init() {
	Output["excel"] = func(self *Collector, dataIndex int) (err error) {
		defer func() {
			if p := recover(); p != nil {
				err = fmt.Errorf("%v", p)
			}
		}()

		var (
			file   *xlsx.File
			row    *xlsx.Row
			cell   *xlsx.Cell
			sheets = make(map[string]*xlsx.Sheet)
		)

		// 創建文件
		file = xlsx.NewFile()

		// 添加分類數據工作表
		for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
			var subNamespace = util.FileNameReplace(self.subNamespace(datacell))
			if _, ok := sheets[subNamespace]; !ok {
				// 添加工作表
				sheet, err := file.AddSheet(subNamespace)
				if err != nil {
					logs.Log.Error("%v", err)
					continue
				}
				sheets[subNamespace] = sheet
				// 寫入表頭
				row = sheets[subNamespace].AddRow()
				for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
					row.AddCell().Value = title
				}
				if self.Spider.OutDefaultField() {
					row.AddCell().Value = "當前鏈接"
					row.AddCell().Value = "上級鏈接"
					row.AddCell().Value = "下載時間"
				}
			}

			row = sheets[subNamespace].AddRow()
			for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
				cell = row.AddCell()
				vd := datacell["Data"].(map[string]interface{})
				if v, ok := vd[title].(string); ok || vd[title] == nil {
					cell.Value = v
				} else {
					cell.Value = util.JsonString(vd[title])
				}
			}
			if self.Spider.OutDefaultField() {
				row.AddCell().Value = datacell["Url"].(string)
				row.AddCell().Value = datacell["ParentUrl"].(string)
				row.AddCell().Value = datacell["DownloadTime"].(string)
			}
		}
		folder := config.TEXT_DIR + "/" + cache.StartTime.Format("2006年01月02日 15時04分05秒")
		filename := fmt.Sprintf("%v/%v__%v-%v.xlsx", folder, util.FileNameReplace(self.namespace()), self.sum[0], self.sum[1])

		// 創建/打開目錄
		f2, err := os.Stat(folder)
		if err != nil || !f2.IsDir() {
			if err := os.MkdirAll(folder, 0777); err != nil {
				logs.Log.Error("Error: %v\n", err)
			}
		}

		// 保存文件
		err = file.Save(filename)
		return
	}
}
開發者ID:ReinhardHsu,項目名稱:pholcus,代碼行數:74,代碼來源:output_excel.go

示例15: init

/************************ Kafka 輸出 ***************************/
func init() {
	var (
		kafkaSenders    = map[string]*kafka.KafkaSender{}
		kafkaSenderLock sync.RWMutex
	)

	var getKafkaSender = func(name string) (*kafka.KafkaSender, bool) {
		kafkaSenderLock.RLock()
		tab, ok := kafkaSenders[name]
		kafkaSenderLock.RUnlock()
		return tab, ok
	}

	var setKafkaSender = func(name string, tab *kafka.KafkaSender) {
		kafkaSenderLock.Lock()
		kafkaSenders[name] = tab
		kafkaSenderLock.Unlock()
	}

	DataOutput["kafka"] = func(self *Collector) error {
		_, err := kafka.GetProducer()
		if err != nil {
			return fmt.Errorf("kafka producer失敗: %v", err)
		}
		var (
			kafkas    = make(map[string]*kafka.KafkaSender)
			namespace = util.FileNameReplace(self.namespace())
		)
		for _, datacell := range self.dataDocker {
			subNamespace := util.FileNameReplace(self.subNamespace(datacell))
			topicName := joinNamespaces(namespace, subNamespace)
			sender, ok := kafkas[topicName]
			if !ok {
				sender, ok = getKafkaSender(topicName)
				if ok {
					kafkas[topicName] = sender
				} else {
					sender = kafka.New()
					sender.SetTopic(topicName)
					setKafkaSender(topicName, sender)
					kafkas[topicName] = sender
				}
			}
			data := make(map[string]interface{})
			for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
				vd := datacell["Data"].(map[string]interface{})
				if v, ok := vd[title].(string); ok || vd[title] == nil {
					data[title] = v
				} else {
					data[title] = util.JsonString(vd[title])
				}
			}
			if self.Spider.OutDefaultField() {
				data["url"] = datacell["Url"].(string)
				data["parent_url"] = datacell["ParentUrl"].(string)
				data["download_time"] = datacell["DownloadTime"].(string)
			}
			err := sender.Push(data)
			util.CheckErr(err)
		}
		kafkas = nil
		return nil
	}
}
開發者ID:henrylee2cn,項目名稱:pholcus,代碼行數:65,代碼來源:output_kafka.go


注:本文中的github.com/henrylee2cn/pholcus/common/util.FileNameReplace函數示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。