lecture08 2교시 word2vec 기초실습
from gensim.models.keyedvectors import KeyedVectors
# 구글 뉴스 pretrained rate 로드, analogy task 실행
model = KeyedVectors.load_word2vec_format("./data/GoogleNews-vectors-negative300.bin.gz", binary=True, limit=30000)
score, predictions = model.evaluate_word_analogies('./data/analogy_task.txt')
print(model['apple'])
# model.similarity 벡터 거리 계산
print("similarity between apple and fruit: {}".format(model.similarity("apple", "fruit")))
print("similarity between apple and car: {}".format(model.similarity("apple", "car")))
# model.most_similar 유사어 거리순 나열
print(model.most_similar("apple", topn=10))
print(model.most_similar("car", topn=10))
print(model.most_similar(positive=['king', 'women'],negative=['man'],topn=10))
[-0.06445312 -0.16015625 -0.01208496 0.13476562 -0.22949219 0.16210938
0.3046875 -0.1796875 -0.12109375 0.25390625 -0.01428223 -0.06396484
-0.08056641 -0.05688477 -0.19628906 0.2890625 -0.05151367 0.14257812
...
0.20410156 -0.19628906 -0.35742188 0.35742188 -0.11962891 0.35742188
0.10351562 0.07080078 -0.24707031 -0.10449219 -0.19238281 0.1484375
0.00057983 0.296875 -0.12695312 -0.03979492 0.13183594 -0.16601562
0.125 0.05126953 -0.14941406 0.13671875 -0.02075195 0.34375 ]
similarity between apple and fruit: 0.6410146951675415
similarity between apple and car: 0.12830707430839539
[('apples', 0.720359742641449), ('fruit', 0.641014575958252), ('potato', 0.596093475818634), ('grape', 0.5935864448547363), ('grapes', 0.569225549697876), ('pumpkin', 0.5651882886886597), ('berries', 0.5636081099510193), ('tomato', 0.5608182549476624), ('strawberries', 0.5492885112762451), ('cherry', 0.5369483828544617)]
[('vehicle', 0.7821097373962402), ('cars', 0.7423831224441528), ('SUV', 0.7160962820053101), ('minivan', 0.6907036304473877), ('truck', 0.6735789775848389), ('Car', 0.667760968208313), ('Jeep', 0.651133120059967), ('pickup_truck', 0.64414381980896), ('scooter', 0.638153076171875), ('sedan', 0.6336700916290283)]
[('queen', 0.4827325940132141), ('kings', 0.455863893032074), ('monarch', 0.39624834060668945), ('monarchy', 0.39430153369903564), ('Women', 0.38392186164855957), ('husbands', 0.38302212953567505), ('royal', 0.35934895277023315), ('sexes', 0.35069453716278076), ('females', 0.34812670946121216), ('kingdom', 0.34666597843170166)]