. python:
----------------------------------------------- python (type end to exit) ------
>>> print('Hello World!')
Hello World!
>>> end
--------------------------------------------------------------------------------
Just as when using Python directly, indentation is important when entering Python code in Stata.
. python:
----------------------------------------------- python (type end to exit) ------
>>> sum = 0
>>> for i in range(7):
... sum = sum + i
>>> print(sum)
21
>>> end
--------------------------------------------------------------------------------
. python:
----------------------------------------------- python (type end to exit) ------
>>> from functools import reduce
>>> from sfi import Data, Macro
>>>
>>> stata: quietly sysuse auto, clear
>>>
>>> sum = reduce((lambda x, y: x + y), Data.get(var='price'))
>>>
>>> Macro.setLocal('sum', str(sum))
>>> end
--------------------------------------------------------------------------------
. display "sum of var price is : `sum'"
sum of var price is : 456229
. python:
----------------------------------------------- python (type end to exit) ------
>>> sum1 = reduce((lambda x, y: x + y), Data.get(var='rep78'))
>>> sum1
inf
>>> sum2 = reduce((lambda x, y: x + y), Data.get(var='rep78', selectvar=-1))
>>> sum2
235
>>> end
--------------------------------------------------------------------------------
. python:
----------------------------------------------- python (type end to exit) ------
>>> import numpy as np
>>> from sfi import Platform
>>>
>>> import matplotlib
>>> if Platform.isWindows():
... matplotlib.use('TkAgg')
...
>>> import matplotlib.pyplot as plt
>>> from mpl_toolkits import mplot3d
>>> from sfi import Data
>>> end
--------------------------------------------------------------------------------
We use the Stata sandstone example dataset.
. use https://www.stata-press.com/data/r16/sandstone, clear
(Subsea elevation of Lamont sandstone in an area of Ohio)
. * Use sfi to get data from Stata
. python:
----------------------------------------------- python (type end to exit) ------
>>> D = np.array(Data.get("northing easting depth"))
>>> end
--------------------------------------------------------------------------------
We use a triangular mesh to produce the graph.
python:
ax = plt.axes(projection='3d')
plt.xticks(np.arange(60000, 90001, step=10000))
plt.yticks(np.arange(30000, 50001, step=5000))
ax.plot_trisurf(D[:,0], D[:,1], D[:,2], cmap='viridis', edgecolor='none')
plt.savefig("sandstone.png")
end
python:
ax.plot_trisurf(D[:,0], D[:,1], D[:,2],
cmap=plt.cm.Spectral, edgecolor='none')
ax.view_init(30, 60)
plt.savefig("sandstone1.png")
end
We scrape Nasdaq 100 stock tickers with pandas.
python:
import pandas as pd
data = pd.read_html("https://en.wikipedia.org/wiki/NASDAQ-100")
df = data[2]
df = df.drop(df.index[0])
t = df.values.tolist()
end
python:
from sfi import Data
Data.addObs(len(t))
stata: gen company = ""
stata: gen ticker = ""
Data.store(None, range(len(t)), t)
end
. list in 1/5, clean
company ticker
1. Activision Blizzard ATVI
2. Adobe Inc. ADBE
3. Advanced Micro Devices AMD
4. Alexion Pharmaceuticals ALXN
5. Align Technology, Inc. ALGN
We use a Python script to get detailed information of a Nasdaq 100 stock, for example ATVI.
use nas100ticker, clear
quietly describe
frame create detail
forvalues i = 1/`r(N)' {
local a = ticker[`i']
local b detail
python script nas1detail.py, args(`a' `b')
sleep 100
}
frame detail : save nasd100detail.dta, replace
. list ticker open_price open_date close_price close_date in 1/5, clean
ticker open_p~e open_date close_pr~e close_date
1. ADBE $Â 48.33 Jul. 9, 2019 $Â 48.15 Jul. 8, 2019
2. AMD $Â 303.92 Jul. 9, 2019 $Â 303.98 Jul. 8, 2019
3. ALXN $Â 30.87 Jul. 9, 2019 $Â 31.50 Jul. 8, 2019
4. ALGN $Â 132.71 Jul. 9, 2019 $Â 130.30 Jul. 8, 2019
5. GOOGL $Â 274.11 Jul. 9, 2019 $Â 1,116.79 Jul. 8, 2019
program pysvm
version 16
syntax varlist, predict(name)
gettoken label feature : varlist
python: dosvm("`label'", "`feature'", "`predict'")
end
python:
from sfi import Data
import numpy as np
from sklearn.svm import SVC
def dosvm(label, features, predict):
X = np.array(Data.get(features))
y = np.array(Data.get(label))
svc_clf = SVC(gamma='auto')
svc_clf.fit(X, y)
y_pred = svc_clf.predict(X)
Data.addVarByte(predict)
Data.store(predict, None, y_pred)
end
. sysuse auto
(1978 Automobile Data)
. pysvm foreign mpg price, predict(for2)
. label values for2 origin
. tabulate foreign for2, nokey
| for2
Car type | Domestic Foreign | Total
-----------+----------------------+----------
Domestic | 52 0 | 52
Foreign | 0 22 | 22
-----------+----------------------+----------
Total | 52 22 | 74
. pysvm2 foreign mpg price if runiform() <= 0.2
note: training finished successfully
. pysvm2predict for2
. label values for2 origin
. tabulate foreign for2, nokey
| for2
Car type | Domestic Foreign | Total
-----------+----------------------+----------
Domestic | 52 0 | 52
Foreign | 14 8 | 22
-----------+----------------------+----------
Total | 66 8 | 74
program pysvm2
version 16
syntax varlist(min=3) [if] [in]
gettoken label features : varlist
marksample touse
qui count if `touse'
if r(N) == 0 {
di as error "no observations"
exit 2000
}
qui summarize `label' if `touse'
if r(min) >= r(max) {
di as error "outcome does not vary"
exit 2000
}
quietly python: dosvm2("`label'", "`features'", "`touse'")
di as text "note: training finished successfully"
end
python:
import sys
from sfi import Data, Macro
import numpy as np
from sklearn.svm import SVC
import __main__
def dosvm2(label, features, select):
X = np.array(Data.get(features, selectvar=select))
y = np.array(Data.get(label, selectvar=select))
svc_clf = SVC(gamma='auto')
svc_clf.fit(X, y)
__main__.svc_clf = svc_clf
Macro.setGlobal('e(svm_features)', features)
return svc_clf
end
program pysvm2predict
version 16
syntax anything [if] [in]
gettoken newvar rest : anything
if "`rest'" != "" {
exit 198
}
confirm new variable `newvar'
marksample touse
qui count if `touse'
if r(N) == 0 {
di as text "zero observations"
exit 2000
}
qui replace `touse' = _n if `touse' != 0
python: dosvmpredict2("`newvar'", "`touse'")
end
python:
import sys
from sfi import Data, Macro
import numpy as np
from sklearn.svm import SVC
from __main__ import svc_clf
def dosvmpredict2(predict, select):
features = select + " "+ Macro.getGlobal('e(svm_features)')
X = np.array(Data.get(features, selectvar=select))
y_pred = svc_clf.predict(X[:,1:])
y1 = np.reshape(y_pred, (-1,1))
y = np.concatenate((X, y1), axis=1)
Data.addVarDouble(predict)
dim = y.shape[0]
j = y.shape[1]-1
for i in range(dim):
Data.storeAt(predict, y[i,0]-1, y[i,j])
end
In pysvm2.ado ado code:
...
import __main__
...
__main__.svc_clf = svc_clf
...
To access svc_clf in Python routines in ado files:
...
from __main__ import svc_clf
...