. python:
----------------------------------------------- python (type end to exit) -------------------------
>>> print('Hello World!')
Hello World!
>>> end
---------------------------------------------------------------------------------------------------
Stata与其他Python环境一样,输入Python语句需要正确使用“缩进”。
. python:
----------------------------------------------- python (type end to exit) -------------------------
>>> sum = 0
>>> for i in range(7):
... sum = sum + i
>>> print(sum)
21
>>> end
---------------------------------------------------------------------------------------------------
. python:
----------------------------------------------- python (type end to exit) -------------------------
>>> from functools import reduce
>>> from sfi import Data, Macro
>>>
>>> stata: quietly sysuse auto, clear
>>>
>>> sum = reduce((lambda x, y: x + y), Data.get(var='price'))
>>>
>>> Macro.setLocal('sum', str(sum))
>>> end
---------------------------------------------------------------------------------------------------
. display "sum of var price is : `sum'"
sum of var price is : 456229
. python:
----------------------------------------------- python (type end to exit) -------------------------
>>> sum1 = reduce((lambda x, y: x + y), Data.get(var='rep78'))
>>> sum1
inf
>>> sum2 = reduce((lambda x, y: x + y), Data.get(var='rep78', selectvar=-1))
>>> sum2
235
>>> end
---------------------------------------------------------------------------------------------------
. python:
----------------------------------------------- python (type end to exit) -------------------------
>>> import numpy as np
>>> from sfi import Platform
>>>
>>> import matplotlib
>>> if Platform.isWindows():
... matplotlib.use('TkAgg')
...
>>> import matplotlib.pyplot as plt
>>> from mpl_toolkits import mplot3d
>>> from sfi import Data
>>> end
---------------------------------------------------------------------------------------------------
. use https://www.stata-press.com/data/r16/sandstone, clear
(Subsea elevation of Lamont sandstone in an area of Ohio)
. * Use sfi to get data from Stata
. python:
----------------------------------------------- python (type end to exit) -------------------------
>>> D = np.array(Data.get("northing easting depth"))
>>> end
---------------------------------------------------------------------------------------------------
python:
ax = plt.axes(projection='3d')
ax.xaxis.xticks(np.arange(60000, 90001, step=10000))
ax.yaxis.yticks(np.arange(30000, 50001, step=5000))
ax.plot_trisurf(D[:,0], D[:,1], D[:,2], cmap='viridis', edgecolor='none')
plt.savefig("sandstone.png")
end
python:
ax.plot_trisurf(D[:,0], D[:,1], D[:,2],
cmap=plt.cm.Spectral, edgecolor='none')
ax.view_init(30, 60)
plt.savefig("sandstone1.png")
end
python:
import pandas as pd
data = pd.read_html("https://en.wikipedia.org/wiki/NASDAQ-100")
df = data[2]
df = df.drop(df.index[0])
t = df.values.tolist()
end
python:
from sfi import Data
Data.addObs(len(t))
stata: gen company = ""
stata: gen ticker = ""
Data.store(None, range(len(t)), t)
end
. list in 1/5, clean
company ticker
1. Adobe Inc. ADBE
2. Advanced Micro Devices AMD
3. Alexion Pharmaceuticals ALXN
4. Align Technology, Inc. ALGN
5. Alphabet Inc. (Class A) GOOGL
使用Python script获得Nasdaq 100股票具体信息, 例如ATVI.
use nas100ticker, clear
quietly describe
frame create detail
forvalues i = 1/`r(N)' {
local a = ticker[`i']
local b detail
python script nas1detail.py, args(`a' `b')
sleep 100
}
frame detail : save nasd100detail.dta, replace
. list ticker open_price open_date close_price close_date in 1/5, clean
ticker open_price open_date close_pr~e close_date
1. GOOGL $ 1,168.43 Aug. 15, 2019 $ 1,164.25 Aug. 14, 2019
2. GOOG $ 1,164.32 Aug. 15, 2019 $ 1,164.29 Aug. 14, 2019
3. AMZN $ 1,783 Aug. 15, 2019 $ 1,762.96 Aug. 14, 2019
4. AAL $ 26.20 Aug. 15, 2019 $ 26.10 Aug. 14, 2019
5. AMGN $ 200 Aug. 15, 2019 $ 198.87 Aug. 14, 2019
program pysvm
version 16
syntax varlist, predict(name)
gettoken label feature : varlist
python: dosvm("`label'", "`feature'", "`predict'")
end
python:
from sfi import Data
import numpy as np
from sklearn.svm import SVC
def dosvm(label, features, predict):
X = np.array(Data.get(features))
y = np.array(Data.get(label))
svc_clf = SVC(gamma='auto')
svc_clf.fit(X, y)
y_pred = svc_clf.predict(X)
Data.addVarByte(predict)
Data.store(predict, None, y_pred)
end
. sysuse auto, clear
(1978 Automobile Data)
. pysvm foreign mpg price, predict(for2)
. label values for2 origin
. tabulate foreign for2, nokey
| for2
Car type | Domestic Foreign | Total
-----------+----------------------+----------
Domestic | 52 0 | 52
Foreign | 0 22 | 22
-----------+----------------------+----------
Total | 52 22 | 74
. pysvm2 foreign mpg price if runiform() <= 0.2
note: training finished successfully
. pysvm2predict for2
. label values for2 origin
. tabulate foreign for2, nokey
| for2
Car type | Domestic Foreign | Total
-----------+----------------------+----------
Domestic | 52 0 | 52
Foreign | 16 6 | 22
-----------+----------------------+----------
Total | 68 6 | 74
program pysvm2
version 16
syntax varlist(min=3) [if] [in]
gettoken label features : varlist
marksample touse
qui count if `touse'
if r(N) == 0 {
di as error "no observations"
exit 2000
}
qui summarize `label' if `touse'
if r(min) >= r(max) {
di as error "outcome does not vary"
exit 2000
}
quietly python: dosvm2("`label'", "`features'", "`touse'")
di as text "note: training finished successfully"
end
python:
import sys
from sfi import Data, Macro
import numpy as np
from sklearn.svm import SVC
import __main__
def dosvm2(label, features, select):
X = np.array(Data.get(features, selectvar=select))
y = np.array(Data.get(label, selectvar=select))
svc_clf = SVC(gamma='auto')
svc_clf.fit(X, y)
__main__.svc_clf = svc_clf
Macro.setGlobal('e(svm_features)', features)
return svc_clf
end
program pysvm2predict
version 16
syntax anything [if] [in]
gettoken newvar rest : anything
if "`rest'" != "" {
exit 198
}
confirm new variable `newvar'
marksample touse
qui count if `touse'
if r(N) == 0 {
di as text "zero observations"
exit 2000
}
qui replace `touse' = _n if `touse' != 0
python: dosvmpredict2("`newvar'", "`touse'")
end
python:
import sys
from sfi import Data, Macro
import numpy as np
from sklearn.svm import SVC
from __main__ import svc_clf
def dosvmpredict2(predict, select):
features = select + " "+ Macro.getGlobal('e(svm_features)')
X = np.array(Data.get(features, selectvar=select))
y_pred = svc_clf.predict(X[:,1:])
y1 = np.reshape(y_pred, (-1,1))
y = np.concatenate((X, y1), axis=1)
Data.addVarDouble(predict)
dim = y.shape[0]
j = y.shape[1]-1
for i in range(dim):
Data.storeAt(predict, y[i,0]-1, y[i,j])
end
In pysvm2.ado ado code:
...
import __main__
...
__main__.svc_clf = svc_clf
...
To access svc_clf in Python routines in ado files:
...
from __main__ import svc_clf
...
url = "http://www.uone-tech.cn/news/stata16.html"
html = requests.get(url)
html.encoding='utf-8'
text = BeautifulSoup(html.text).get_text()
import jieba
words = jieba.lcut(text)
url = "https://www.stata.com/new-in-stata/python-integration/"
html = requests.get(url)
text = BeautifulSoup(html.text).get_text()
from wordcloud import WordCloud
wordcloud = WordCloud(max_font_size=75,
max_words=100,
background_color="white").generate(text)