-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmatcher_v2.py
64 lines (52 loc) · 3.47 KB
/
matcher_v2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# Define a function to load the data
@st.cache
def load_data():
data = pd.read_csv("founders.csv")
return data
# Define a function to rank the potential co-founders based on multiple factors
def rank_founders(founders_df, input_founder):
# Extract the motivation, values, skills, and experience of the input founder
input_motivation = founders_df[founders_df['Founder'] == input_founder]['Motivation'].iloc[0]
input_values = founders_df[founders_df['Founder'] == input_founder]['Values'].iloc[0]
input_skills = founders_df[founders_df['Founder'] == input_founder]['Skills'].iloc[0]
input_experience = founders_df[founders_df['Founder'] == input_founder]['Experiences'].iloc[0]
# Create a new column in the DataFrame that combines the skills, experience, motivation, and values columns
founders_df['skills_experience_motivation_values'] = founders_df['Skills'] + " " + founders_df['Experiences'] + " " + founders_df['Motivation'] + " " + founders_df['Values']
# Define the TF-IDF vectorizer
vectorizer = TfidfVectorizer(stop_words='english')
# Fit and transform the vectorizer on the skills, experience, motivation, and values column
tfidf = vectorizer.fit_transform(founders_df['skills_experience_motivation_values'])
# Calculate the cosine similarity between the input founder and all the other founders
input_index = founders_df[founders_df['Founder'] == input_founder].index[0]
cosine_similarities = cosine_similarity(tfidf[input_index], tfidf)
# Sort the cosine similarities in descending order and extract the top 10 most similar founders
similar_indices = cosine_similarities.argsort()[0][-11:-1]
similar_founders = founders_df.iloc[similar_indices]['Founder'].tolist()
similarities = cosine_similarities[0][similar_indices].tolist()
# Create a dictionary of the ranked co-founders and their similarity scores
ranked_founders = {}
for founder, similarity in zip(similar_founders, similarities):
ranked_founders[founder] = round(similarity*100, 2)
# Calculate the complementary matches based on the input
complementary_matches = []
input_vector = vectorizer.transform([input_skills + " " + input_experience + " " + input_motivation + " " + input_values])
for i, founder in enumerate(similar_founders):
founder_motivation = founders_df[founders_df['Founder'] == founder]['Motivation'].iloc[0]
founder_values = founders_df[founders_df['Founder'] == founder]['Values'].iloc[0]
founder_skills = founders_df[founders_df['Founder'] == founder]['Skills'].iloc[0]
founder_experience = founders_df[founders_df['Founder'] == founder]['Experience'].iloc[0]
founder_vector = vectorizer.transform([founder_skills + " " + founder_experience + " " + founder_motivation + " " + founder_values])
similarity = cosine_similarity(input_vector, founder_vector)[0][0]
complementary_match = (founder, round(similarity*100, 2))
complementary_matches.append(complementary_match)
# Sort the complementary matches by similarity score in descending order
complementary_matches.sort(key=lambda x: x[1], reverse=True)
# Print the complementary matches
print("Complementary Matches:")
for match in complementary_matches:
print("- Founder:", match[0])
print(" Similarity Score:", match[1], "%")