All API are listed in etl_utils/__init__.py.
Install it.
pip install etl_utilsImport it.
from etl_utils import * # increase only 6 MB memory.from etl_utils import process_notifier
import time
for i1 in process_notifier(iteratable_object, msg=u"RANGE"):
# process(i1)
time.sleep(0.005)
# Example output is:
# [pid 17510] RANGE processing 500 records 100% |###################################################################################################################| 166.61 items/sRequirements about iteratable_object:
- Iteratable data structure, e.g. generator, list like or dict like object, any orm query, or file object.
- Exist a way to fetch total count of this
iteratable_object, but it's optional for lazy generator.
Python's default print function can only deal with basic unicode, but not the Chinese
unicode in nested dict or list. So let's transfer this functionality to uprint.
But remember that string type must convert into unicode type, or the output will be garbled.
Example:
>>> print({u"你好":u"世界"})
{u'\u4f60\u597d': u'\u4e16\u754c'}
>>> from etl_utils import uprint
>>> tmp = uprint({u"你好":u"世界"})
{u'你好': u'世界'}
>>>cpickle_cache(cache_file_path, generate_data_func)Generate cache data if cache_file_path not exists.
Turn a function into a property.
class Universe:
@cached_property
def answer(self):
return 42
answer = Universe().answer # no ()
assert answer, 42 // TrueSimilar to cached_property, but it's a property on a class itself.
Singleton pattern restricts the instantiation of a class to one object, see more informations at Wikipedia .
@singleton() # or @singleton(multi_init=True)
class MySingleton(object):
@cached_property
def heavy_cpu(self):
# process ...
return cached_data
def another_function(self, params):
return process(params)
o1 = MySingleton()
o2 = MySingleton()
assert o1, o2 // TrueRe-import MySingleton package will not cause initializing MySingleton class twice, so you
can encapsulate a series of functions and data into MySingleton class.
This function is thread-safe, and is imported from https://pypi.python.org/pypi/pysingleton .
ListUtils.most_common_inspect(list1)
ListUtils.uniq_seqs(seqs, uniq_lambda=None)
StringUtils.merge(*strs)
StringUtils.calculate_text_similarity(text1, text2, inspect=False, similar_rate_baseline=0.0, skip_special_chars=False)
StringUtils.frequence_chars_info(str1, length_lambda=lambda len1 : len1)
DictUtils.nested_read(dict1, keys, default_val=None)
DictUtils.add_default_value(dict1, default_value=None)
UnicodeUtils.is_chinese(uchar)
UnicodeUtils.is_number(uchar)
UnicodeUtils.is_alphabet(uchar)
UnicodeUtils.is_other(uchar)
UnicodeUtils.B2Q(uchar)
UnicodeUtils.is_Q(uchar)
UnicodeUtils.Q2B(uchar)
UnicodeUtils.stringQ2B(ustring, convert_strs={})
UnicodeUtils.uniform(ustring)
UnicodeUtils.string2List(ustring)
UnicodeUtils.ljust(str1, width, fillchar=' ')
UnicodeUtils.rjust(str1, width, fillchar=' ')
UnicodeUtils.just_str(self)
UnicodeUtils.read(filename)
HashUtils.hashvalue_with_sorted(str1)
ItertoolsUtils.split_seqs_by_size(seqs1, size1, inspect=False)
JsonUtils.unicode_dump(item1)generated by ruby generate_api_doc.rb
Load data only when needed.
from etl_utils import ld
ld.en_us_dict
ld.two_length_words
ld.regular_words
ld.lemmatize(word1)
ld.tagged_words__dict
ld.jieba
from etl_utils import regexp
regexp.alphabet
regexp.word
regexp.upper
regexp.object_id
regexp.special_chars# `slots_with_pickle` decorator adding `__slots__` to these classes can
# dramatically reduce the memory footprint, and improve execution speed
# by eliminating the instance dictionary.
# And it also possible to pickle/unpickle objects.
@slots_with_pickle('attr_a', 'attr_b', 'attr_c')
class Slots(object):
def __init__(self):
attr_a = 'a'
attr_b = 'b'
attr_c = 'c'calculate_entropy(feature_with_count_dict)
is_nltk_word(str1) # is valid English word
extract_words(sentence)
ItemIncrementIdDict # Assign an auto increment integer to item, e.g. an object_id
ItemsGroupAndIndexes # group result
MarkObjectIds # mark processed objects group
# Sequentially process lambda in `lambdas`, return the first one with no exception.
set_default_value(lambdas, msg=u"")pip install -r requirements.txt
pip install nose
nosetestsMIT. David Chen @ 17zuoye.