from whatlies import EmbeddingSet
from whatlies.language import SpacyLanguage
lang = SpacyLanguage("en_core_web_md")
words = ["cat", "dog", "fish", "kitten", "man", "woman",
"king", "queen", "doctor", "nurse"]
emb = EmbeddingSet(*[lang[w] for w in words])
emb.plot_interactive(x_axis=emb["man"], y_axis=emb["woman"])
import numpy as np
from whatlies.language import BytePairLanguage
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
pipe = Pipeline([
("embed", BytePairLanguage("en")),
("model", LogisticRegression())
])
X = [
"i really like this post",
"thanks for that comment",
"i enjoy this friendly forum",
"this is a bad post",
"i dislike this article",
"this is not well written"
]
y = np.array([1, 1, 1, 0, 0, 0])
pipe.fit(X, y)
Pipeline(steps=[('embed', BytePairLanguage(lang='en')),
('model', LogisticRegression())])
(['i really like this post',
'thanks for that comment',
'i enjoy this friendly forum',
'this is a bad post',
'i dislike this article',
'this is not well written'],
array([1, 1, 1, 0, 0, 0]))
/home/manoj/chatgpt/searchexp
['@Tesco your groceries app and mobile site refuse to allow me to log in it’s really frustrating any plans for implementing a fix?',
'@Tesco do you have shops in France?',
'Worst Tesco experience today. No stock of all the essentials, long lines, not enough tills open. @Tesco']
array([[ 0.19568161, -0.00381794],
[ 0.16365132, 0.19927045],
[ 0.15350373, -0.00687524],
...,
[ 0.14391981, -0.02399666],
[ 0.19659102, 0.1060059 ],
[ 0.16526766, 0.0234143 ]])