Python normality.stringify方法代码示例

本文整理汇总了Python中normality.stringify方法的典型用法代码示例。如果您正苦于以下问题：Python normality.stringify方法的具体用法？Python normality.stringify怎么用？Python normality.stringify使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类normality的用法示例。

在下文中一共展示了normality.stringify方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: fetch

# 需要导入模块: import normality [as 别名]
# 或者: from normality import stringify [as 别名]
def fetch():
    file_path = os.path.dirname(__file__)
    out_path = os.path.join(file_path, '..', 'fingerprints', 'types.json')
    types = {}
    fh = urlopen(CSV_URL)
    fh = io.TextIOWrapper(fh, encoding='utf-8')
    for row in csv.DictReader(fh):
        name = stringify(row.get('Name'))
        abbr = stringify(row.get('Abbreviation'))
        if name is None or abbr is None:
            continue
        if name in types and types[name] != abbr:
            print(name, types[name], abbr)
        types[name] = abbr
        # print abbr, name

    elf_path = os.path.join(file_path, 'elf-code-list.csv')
    with open(elf_path, 'r') as fh:
        for row in csv.DictReader(fh):
            pprint(dict(row))

    with open(out_path, 'w') as fh:
        json.dump({'types': types}, fh)

开发者ID:alephdata，项目名称:fingerprints，代码行数:25，代码来源:generate.py

示例2: init

# 需要导入模块: import normality [as 别名]
# 或者: from normality import stringify [as 别名]
def __init__(self, schema, name, data):
        self.schema = schema
        self.model = schema.model

        self.name = stringify(name)
        self.qname = '%s:%s' % (schema.name, self.name)
        if self.name in self.RESERVED:
            raise InvalidModel("Reserved name: %s" % self.name)

        self.data = data
        self._label = data.get('label', name)
        self._description = data.get('description')
        self.hidden = data.get('hidden', False)
        self.stub = data.get('stub', False)

        type_ = data.get('type', 'string')
        self.type = registry.get(type_)
        if self.type is None:
            raise InvalidModel("Invalid type: %s" % type_)

        self.matchable = data.get('matchable', self.type.matchable)
        self.range = None
        self.reverse = None
        self.uri = URIRef(data.get('rdf', NS[self.qname]))

开发者ID:alephdata，项目名称:followthemoney，代码行数:26，代码来源:property.py

示例3: parse_emails

# 需要导入模块: import normality [as 别名]
# 或者: from normality import stringify [as 别名]
def parse_emails(self, text):
        """Parse an email list with the side effect of adding them to the
        relevant result lists."""
        parsed = address.parse_list(safe_string(text))

        # If the snippet didn't parse, assume it is just a name.
        if not len(parsed):
            return [(text, None)]

        values = []
        for addr in parsed:
            name = stringify(addr.display_name)
            email = stringify(addr.address)

            if not self.check_email(email):
                email = None

            if self.check_email(name):
                email = email or name
                name = None

            self.result.emit_email(email)
            self.result.emit_name(name)
            values.append((name, email))
        return values

开发者ID:occrp-attic，项目名称:ingestors，代码行数:27，代码来源:email.py

示例4: pdf_extract_page

# 需要导入模块: import normality [as 别名]
# 或者: from normality import stringify [as 别名]
def pdf_extract_page(self, temp_dir, page):
        """Extract the contents of a single PDF page, using OCR if need be."""
        pagenum = page.page_no
        texts = page.lines

        image_path = os.path.join(temp_dir, str(uuid.uuid4()))
        page.extract_images(path=image_path.encode('utf-8'), prefix=b'img')
        ocr = get_ocr()
        languages = self.result.ocr_languages
        for image_file in glob.glob(os.path.join(image_path, "*.png")):
            with open(image_file, 'rb') as fh:
                data = fh.read()
                text = ocr.extract_text(data, languages=languages)
                text = stringify(text)
                if text is not None:
                    texts.append(text)

        text = ' \n'.join(texts).strip()
        self.result.emit_page(int(pagenum), text)

开发者ID:occrp-attic，项目名称:ingestors，代码行数:21，代码来源:pdf.py

示例5: csv_child_iter

# 需要导入模块: import normality [as 别名]
# 或者: from normality import stringify [as 别名]
def csv_child_iter(self, iter, name):
        out_name = safe_filename(name,
                                 default='sheet.csv',
                                 extension='csv')
        out_path = join_path(self.work_path, out_name)
        row_count = 0
        with io.open(out_path, 'w', newline='', encoding='utf-8') as fh:
            writer = csv.writer(fh, quoting=csv.QUOTE_ALL)
            for row in iter:
                writer.writerow(row)
                row_count += 1

        name = stringify(name) or 'sheet'
        if row_count == 0:
            log.warning("Skip [%s]: no rows", name)
            return

        log.info("Generated [%s]: %s, %s rows", name, out_name, row_count)

        child_id = join_path(self.result.id, name)
        self.manager.handle_child(self.result, out_path,
                                  id=child_id,
                                  title=name,
                                  file_name=out_name,
                                  mime_type='text/csv')

开发者ID:occrp-attic，项目名称:ingestors，代码行数:27，代码来源:csv.py

示例6: document_to_pdf

# 需要导入模块: import normality [as 别名]
# 或者: from normality import stringify [as 别名]
def document_to_pdf(self, file_path, result, work_path, archive):
        conn = get_redis()
        key = make_key('pdf', result.checksum)
        if conn.exists(key):
            content_hash = stringify(conn.get(key))
            log.info("Using [%s] PDF from cache: %s",
                     result.file_name, content_hash)
            if content_hash is not None:
                result.pdf_checksum = content_hash
                return archive.load_file(content_hash, temp_path=work_path)

        pdf_file = self._document_to_pdf(file_path, result, work_path)
        content_hash = archive.archive_file(pdf_file)
        result.pdf_checksum = content_hash
        conn.set(key, content_hash)
        return pdf_file

开发者ID:occrp-attic，项目名称:ingestors，代码行数:18，代码来源:convert.py

示例7: parse_date

# 需要导入模块: import normality [as 别名]
# 或者: from normality import stringify [as 别名]
def parse_date(date):
    date = stringify(date)
    if date is None:
        return
    date = date.replace('00/00/', '')
    date = date.strip()
    if len(date) == 4:
        return date
    try:
        date = datetime.strptime(date, '%d/%m/%Y')
        return date.date().isoformat()
    except Exception:
        pass
    try:
        date = datetime.strptime(date, '00/%m/%Y')
        return date.date().isoformat()[:7]
    except Exception:
        pass

开发者ID:alephdata，项目名称:opensanctions，代码行数:20，代码来源:gb_hmt_sanctions.py

示例8: split_items

# 需要导入模块: import normality [as 别名]
# 或者: from normality import stringify [as 别名]
def split_items(text):
    items = []
    text = stringify(text)
    if text is None:
        return items
    for raw in text.split(')'):
        if ' ' not in raw:
            items.append(raw)
            continue

        cleaned, suffix = raw.split(' ', 1)
        suffix = suffix.replace('(', '')
        try:
            int(suffix)
            items.append(cleaned)
        except Exception:
            items.append(raw)
    return items

开发者ID:alephdata，项目名称:opensanctions，代码行数:20，代码来源:gb_hmt_sanctions.py

示例9: generate

# 需要导入模块: import normality [as 别名]
# 或者: from normality import stringify [as 别名]
def generate(text, keep_order=False, keep_brackets=False):
    text = stringify(text)
    if text is None:
        return

    # this needs to happen before the replacements
    text = text.lower()
    text = clean_entity_name(text)

    if not keep_brackets:
        # Remove any text in brackets
        # This is meant to handle names of companies which include
        # the jurisdiction, like: Turtle Management (Seychelles) Ltd.
        text = BRACKETED.sub(WS, text)

    # Super hard-core string scrubbing
    text = clean_strict(text)
    text = replace_types(text)

    if keep_order:
        text = collapse_spaces(text)
    else:
        # final manicure, based on openrefine algo
        parts = [p for p in text.split(WS) if len(p)]
        text = WS.join(sorted(set(parts)))

    if not len(text):
        return None

    return text

开发者ID:alephdata，项目名称:fingerprints，代码行数:32，代码来源:generate.py

示例10: test_stringify

# 需要导入模块: import normality [as 别名]
# 或者: from normality import stringify [as 别名]
def test_stringify(self):
        self.assertEqual('.', stringify(' . '))
        self.assertEqual('5', stringify(5))
        self.assertEqual('0.5', stringify(.5))

开发者ID:pudo，项目名称:normality，代码行数:6，代码来源:test_normality.py

示例11: test_stringify_datetime

# 需要导入模块: import normality [as 别名]
# 或者: from normality import stringify [as 别名]
def test_stringify_datetime(self):
        dt = datetime.utcnow()
        text = stringify(dt)
        self.assertTrue(text.startswith('%s-' % dt.year), text)

开发者ID:pudo，项目名称:normality，代码行数:6，代码来源:test_normality.py

示例12: test_petro_iso_encoded

# 需要导入模块: import normality [as 别名]
# 或者: from normality import stringify [as 别名]
def test_petro_iso_encoded(self):
        text = u'Порошенко Петро Олексійович'
        encoded = text.encode('iso-8859-5')
        out = stringify(encoded)
        self.assertEqual(text, out)

开发者ID:pudo，项目名称:normality，代码行数:7，代码来源:test_normality.py

示例13: test_petro_utf16_encoded

# 需要导入模块: import normality [as 别名]
# 或者: from normality import stringify [as 别名]
def test_petro_utf16_encoded(self):
        text = u'Порошенко Петро Олексійович'
        encoded = text.encode('utf-16')
        out = stringify(encoded)
        self.assertEqual(text, out)