From 1e1e48a4aeee76647d8ea6e4829ad0f555bbdb9f Mon Sep 17 00:00:00 2001 From: D-X-Y <280835372@qq.com> Date: Wed, 3 Mar 2021 11:52:10 +0800 Subject: [PATCH] Update notebooks --- .latent-data/init-configs/README.md | 9 + .latent-data/qlib | 2 +- notebooks/Q/qlib-data-play.ipynb | 258 ++++++++++++++++++++++------ notebooks/Q/workflow-demo.py | 41 +++++ 4 files changed, 257 insertions(+), 53 deletions(-) create mode 100644 .latent-data/init-configs/README.md create mode 100644 notebooks/Q/workflow-demo.py diff --git a/.latent-data/init-configs/README.md b/.latent-data/init-configs/README.md new file mode 100644 index 0000000..00c8090 --- /dev/null +++ b/.latent-data/init-configs/README.md @@ -0,0 +1,9 @@ +# Git Commands + +``` +git clone --recurse-submodules git@github.com:D-X-Y/AutoDL-Projects.git + +git submodule init +git submodule update +git pull orign main +``` diff --git a/.latent-data/qlib b/.latent-data/qlib index a96f0c2..d1d7061 160000 --- a/.latent-data/qlib +++ b/.latent-data/qlib @@ -1 +1 @@ -Subproject commit a96f0c2e5fa427ce33660c8791fa2057c6f423fc +Subproject commit d1d70616a3859c0be36236ecd627d07adfb4169b diff --git a/notebooks/Q/qlib-data-play.ipynb b/notebooks/Q/qlib-data-play.ipynb index 5d6e580..59778ae 100644 --- a/notebooks/Q/qlib-data-play.ipynb +++ b/notebooks/Q/qlib-data-play.ipynb @@ -9,10 +9,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "[24128:MainThread](2021-03-01 12:58:36,399) INFO - qlib.Initialization - [config.py:276] - default_conf: client.\n", - "[24128:MainThread](2021-03-01 12:58:36,402) WARNING - qlib.Initialization - [config.py:291] - redis connection failed(host=127.0.0.1 port=6379), cache will not be used!\n", - "[24128:MainThread](2021-03-01 12:58:37,535) INFO - qlib.Initialization - [__init__.py:46] - qlib successfully initialized based on client settings.\n", - "[24128:MainThread](2021-03-01 12:58:37,536) INFO - qlib.Initialization - [__init__.py:47] - data_path=/Users/xuanyidong/.qlib/qlib_data/cn_data\n" + "[82189:MainThread](2021-03-02 21:02:54,241) INFO - qlib.Initialization - [config.py:276] - default_conf: client.\n", + "[82189:MainThread](2021-03-02 21:02:54,255) WARNING - qlib.Initialization - [config.py:291] - redis connection failed(host=127.0.0.1 port=6379), cache will not be used!\n", + "[82189:MainThread](2021-03-02 21:02:54,828) INFO - qlib.Initialization - [__init__.py:46] - qlib successfully initialized based on client settings.\n", + "[82189:MainThread](2021-03-02 21:02:54,829) INFO - qlib.Initialization - [__init__.py:47] - data_path=/Users/xuanyidong/.qlib/qlib_data/cn_data\n" ] } ], @@ -20,72 +20,226 @@ "import os\n", "import sys\n", "import qlib\n", - "qlib.init(provider_uri='~/.qlib/qlib_data/cn_data')" + "import pprint\n", + "import numpy as np\n", + "import pandas as pd\n", + "qlib.init(provider_uri='~/.qlib/qlib_data/cn_data')\n", + "\n", + "from qlib.config import C\n", + "from qlib.data import D\n", + "from qlib.data.data import DatasetD, ExpressionD, Inst, Cal, FeatureD\n", + "from qlib.data.cache import H\n", + "from qlib.data.filter import NameDFilter\n", + "from qlib.utils import code_to_fname, read_bin" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "nameDFilter = NameDFilter(name_rule_re='SH[0-9]{4}55')\n", + "instruments_config = D.instruments(market='csi300', filter_pipe=[nameDFilter])\n", + "instruments = D.list_instruments(instruments=instruments_config,\n", + " start_time='2015-01-01',\n", + " end_time='2016-02-15',\n", + " as_list=True)\n", + "\n", + "fields = ['$close', '$volume', 'Ref($close, 1)', 'Mean($close, 3)', '$high-$low']\n", + "features = D.features(instruments_config, fields, start_time='2010-01-01', end_time='2017-12-31', freq='day')\n", + "print(type(features))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " $close $volume Ref($close, 1) Mean($close, 3) \\\n", + "instrument datetime \n", + "SH600655 2010-01-04 8.934296 47799352.0 8.667867 8.691138 \n", + " 2010-01-05 8.889880 29791234.0 8.934296 8.830681 \n", + " 2010-01-06 8.845468 29002874.0 8.889880 8.889881 \n", + " 2010-01-07 8.553690 38189440.0 8.845468 8.763013 \n", + " 2010-01-08 8.645658 23417642.0 8.553690 8.681605 \n", + "... ... ... ... ... \n", + "SH601555 2017-12-25 1.393481 80615584.0 1.406559 1.408012 \n", + " 2017-12-26 1.406559 64259856.0 1.393481 1.402200 \n", + " 2017-12-27 1.400747 58551256.0 1.406559 1.400262 \n", + " 2017-12-28 1.412371 96204872.0 1.400747 1.406559 \n", + " 2017-12-29 1.412371 52801024.0 1.412371 1.408496 \n", + "\n", + " $high-$low \n", + "instrument datetime \n", + "SH600655 2010-01-04 0.412291 \n", + " 2010-01-05 0.203006 \n", + " 2010-01-06 0.250560 \n", + " 2010-01-07 0.412291 \n", + " 2010-01-08 0.275964 \n", + "... ... \n", + "SH601555 2017-12-25 0.020343 \n", + " 2017-12-26 0.018890 \n", + " 2017-12-27 0.017437 \n", + " 2017-12-28 0.045045 \n", + " 2017-12-29 0.013078 \n", + "\n", + "[2867 rows x 5 columns]\n" + ] + } + ], + "source": [ + "print(features)" ] }, { "cell_type": "code", "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "from qlib.data import D" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(1897,)\n", - "[Timestamp('2010-01-04 00:00:00') Timestamp('2010-01-05 00:00:00')\n", - " Timestamp('2010-01-06 00:00:00') ... Timestamp('2017-12-27 00:00:00')\n", - " Timestamp('2017-12-28 00:00:00') Timestamp('2017-12-29 00:00:00')]\n" - ] - } - ], - "source": [ - "calendar = D.calendar(start_time='2010-01-01', end_time='2017-12-31', freq='day')\n", - "print(calendar.shape)\n", - "print(calendar)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ + "\n", "\n", "LocalProvider\n", - "\n", - "\n" + "Wrapper(provider=)\n", + "Wrapper(provider=)\n", + "\n", + "LocalDatasetProvider\n", + "--\n", + "Wrapper(provider=)\n", + "\n", + "default_disk_cache: 1\n", + "ExpressionD: Wrapper(provider=)\n", + "FeatureD : \n" ] } ], "source": [ - "from qlib.config import C\n", - "from qlib.\n", + "# Provider:\n", + "print(type(D._provider))\n", "print(type(C))\n", - "# print(C)\n", "print(C.provider)\n", - "print(type(D))\n", - "print(type(D._provider))" + "print(D)\n", + "\n", + "# DatasetD Provider\n", + "print(DatasetD)\n", + "print(DatasetD._provider)\n", + "print(C.dataset_provider)\n", + "\n", + "print('--')\n", + "print(Inst)\n", + "print(Inst._provider)\n", + "\n", + "# Default Disk Cache\n", + "print('default_disk_cache: {:}'.format(C.default_disk_cache))\n", + "print('ExpressionD: {:}'.format(ExpressionD))\n", + "print('FeatureD : {:}'.format(FeatureD._provider))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'pprint' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpprint\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minstruments_config\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0minstruments_d\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDatasetD\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_provider\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_instruments_d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minstruments_config\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfreq\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'day'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpprint\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mpprint\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minstruments_d\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'pprint' is not defined" + ] + } + ], + "source": [ + "pprint.pprint(instruments_config)\n", + "instruments_d = DatasetD._provider.get_instruments_d(instruments_config, freq='day')\n", + "pprint.pprint(instruments_d)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2012-12-31 00:00:00 -> 2019-01-18 00:00:00\n", + "\n", + "[1.1059314, 1.0935822, 1.1059314, 1.0922102, 1.0839773, 1.0839773, 1.0181155,\n", + " 1.0730004, 1.0867218, 1.068884,\n", + " ...\n", + " 1.1163876, 1.1208236, 1.1119517, 1.0986437, 1.1075157, 1.0971651, 1.1149089,\n", + " 1.083857, 1.083857, 1.0956864]\n", + "Length: 1439, dtype: float32\n" + ] + } + ], + "source": [ + "instrument, field, freq = 'SH601555', '$close', 'day'\n", + "all_dates = D.calendar(start_time='2011-12-31', end_time='2019-02-10', freq=freq)\n", + "start_time, end_time = all_dates[0], all_dates[-11]\n", + "print(str(start_time) + ' -> ' + str(end_time))\n", + "obj = ExpressionD.expression(instrument, field, start_time, end_time, freq)\n", + "print(obj.array)\n", + "\n", + "# expression = ExpressionD.get_expression_instance(field)\n", + "# start_time = pd.Timestamp(start_time)\n", + "# end_time = pd.Timestamp(end_time)\n", + "# _, _, start_index, end_index = Cal.locate_index(start_time, end_time, freq='day', future=False)\n", + "# print(start_index)\n", + "# print(end_index)\n", + "\n", + "# fname = code_to_fname(instrument)\n", + "# uri_data = FeatureD._uri_data.format(instrument.lower(), field[1:], freq)\n", + "# print(uri_data)\n", + "# # series = read_bin(uri_data, start_index, end_index)\n", + "# series = read_bin(uri_data, 2850, 2870)\n", + "# print(series)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Wrapper(provider=)\n", + "Wrapper(provider=)\n", + "Wrapper(provider=)\n" + ] + } + ], + "source": [ + "from qlib.data import D\n", + "from qlib.data.data import ExpressionD, Inst\n", + "print(D)\n", + "print(Inst)\n", + "print(ExpressionD)" ] }, { diff --git a/notebooks/Q/workflow-demo.py b/notebooks/Q/workflow-demo.py new file mode 100644 index 0000000..06fcd73 --- /dev/null +++ b/notebooks/Q/workflow-demo.py @@ -0,0 +1,41 @@ +import os +import sys +import qlib +import pprint +import numpy as np +import pandas as pd + +from qlib import config as qconfig +from qlib.utils import init_instance_by_config + +qlib.init(provider_uri='~/.qlib/qlib_data/cn_data', region=qconfig.REG_CN) + +dataset_config = { + "class": "DatasetH", + "module_path": "qlib.data.dataset", + "kwargs": { + "handler": { + "class": "Alpha158", + "module_path": "qlib.contrib.data.handler", + "kwargs": { + "start_time": "2008-01-01", + "end_time": "2020-08-01", + "fit_start_time": "2008-01-01", + "fit_end_time": "2014-12-31", + "instruments": "csi300", + }, + }, + "segments": { + "train": ("2008-01-01", "2014-12-31"), + "valid": ("2015-01-01", "2016-12-31"), + "test": ("2017-01-01", "2020-08-01"), + }, + }, + } + +if __name__ == "__main__": + dataset = init_instance_by_config(dataset_config) + pprint.pprint(dataset_config) + pprint.pprint(dataset) + import pdb; pdb.set_trace() + print('Complete')