from sklearn.datasets import load_svmlight_file
from sklearn import preprocessing
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

#load dataset in svmlib format
X, y = load_svmlight_file("dataset1.txt")

#X is scipy.sparse CSR matrix, we need to convert it to numpy array
X = X.toarray()

#scaling to [0,1]
min_max_scaler = preprocessing.MinMaxScaler()
X_scaled = min_max_scaler.fit_transform(X)

#using Chi-Square test to select top-100 features
X_new = SelectKBest(chi2, k=100).fit_transform(X_scaled, y)

print (X_new.shape)
print(X_new)