context
It seems like the language identification utility does not work anymore due to some pickle compatibility issue.
steps to reproduce
In [1]: import textacy
In [2]: textacy.lang_utils.identify_lang("Hello kid")
22%|██████████████████████████████████████████████████████████████▎ | 9.00/40.0 [00:00<00:00, 2.35kB/s]
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-2-9bb21112c695> in <module>
----> 1 textacy.lang_utils.identify_lang("Hello kid")
~/anaconda3/envs/gdpr/lib/python3.7/site-packages/textacy/lang_utils.py in identify_lang(self, text)
154 text_ = utils.to_collection(text[:self.max_text_len], str, list)
155 if self._is_valid(text_[0]):
--> 156 lang = self.pipeline.predict(text_).item()
157 return lang
158 else:
~/anaconda3/envs/gdpr/lib/python3.7/site-packages/textacy/lang_utils.py in pipeline(self)
98 def pipeline(self):
99 if not self._pipeline:
--> 100 self._pipeline = self._load_pipeline()
101 return self._pipeline
102
~/anaconda3/envs/gdpr/lib/python3.7/site-packages/textacy/lang_utils.py in _load_pipeline(self)
106 self.download()
107 with filepath.open(mode="rb") as f:
--> 108 pipeline = joblib.load(f)
109 return pipeline
110
~/anaconda3/envs/gdpr/lib/python3.7/site-packages/joblib/numpy_pickle.py in load(filename, mmap_mode)
593 filename = getattr(fobj, 'name', '')
594 with _read_fileobject(fobj, filename, mmap_mode) as fobj:
--> 595 obj = _unpickle(fobj)
596 else:
597 with open(filename, 'rb') as f:
~/anaconda3/envs/gdpr/lib/python3.7/site-packages/joblib/numpy_pickle.py in _unpickle(fobj, filename, mmap_mode)
527 obj = None
528 try:
--> 529 obj = unpickler.load()
530 if unpickler.compat_mode:
531 warnings.warn("The file '%s' has been generated with a "
~/anaconda3/envs/gdpr/lib/python3.7/pickle.py in load(self)
1086 raise EOFError
1087 assert isinstance(key, bytes_types)
-> 1088 dispatch[key[0]](self)
1089 except _Stop as stopinst:
1090 return stopinst.value
~/anaconda3/envs/gdpr/lib/python3.7/pickle.py in load_obj(self)
1351 def load_obj(self):
1352 # Stack is ... markobject classobject arg1 arg2 ...
-> 1353 args = self.pop_mark()
1354 cls = args.pop(0)
1355 self._instantiate(cls, args)
~/anaconda3/envs/gdpr/lib/python3.7/pickle.py in pop_mark(self)
1093 def pop_mark(self):
1094 items = self.stack
-> 1095 self.stack = self.metastack.pop()
1096 self.append = self.stack.append
1097 return items
IndexError: pop from empty list
expected vs. actual behavior
The language code is not returned.
environment
In [9]: textacy.utils.get_config()
Out[9]:
{'platform': 'linux',
'python': '3.6.10 |Anaconda, Inc.| (default, Jan 7 2020, 21:14:29) \n[GCC 7.3.0]',
'spacy': '2.2.3',
'spacy_models': ['de',
'en',
'xx',
'el',
'nl',
'fr',
'nb',
'es',
'it',
'lt',
'pt'],
'textacy': '0.9.1'}
context
It seems like the language identification utility does not work anymore
due to some pickle compatibility issue.steps to reproduce
expected vs. actual behavior
The language code is not returned.
environment