update,
This commit is contained in:
@@ -0,0 +1,47 @@
|
||||
trigger: none
|
||||
|
||||
pr:
|
||||
autoCancel: true
|
||||
drafts: false
|
||||
branches:
|
||||
include:
|
||||
- master
|
||||
paths:
|
||||
include:
|
||||
- neural_compressor
|
||||
- setup.py
|
||||
- requirements.txt
|
||||
- .azure-pipelines/code-scan.yml
|
||||
- .azure-pipelines/scripts/codeScan
|
||||
- .azure-pipelines/template/docker-template.yml
|
||||
|
||||
pool:
|
||||
vmImage: "ubuntu-latest"
|
||||
|
||||
variables:
|
||||
CODE_SCAN_LOG_PATH: ".azure-pipelines/scripts/codeScan/scanLog"
|
||||
|
||||
stages:
|
||||
- stage: DocStyleCodeScan
|
||||
displayName: DocStyle Code Scan
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job: DocStyle
|
||||
displayName: DocStyle
|
||||
steps:
|
||||
- template: template/code-scan-template.yml
|
||||
parameters:
|
||||
codeScanFileName: "pydocstyle"
|
||||
uploadPath: "pydocstyle.log"
|
||||
|
||||
- stage: BanditCodeScan
|
||||
displayName: Bandit Code Scan
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job: Bandit
|
||||
displayName: Bandit
|
||||
steps:
|
||||
- template: template/code-scan-template.yml
|
||||
parameters:
|
||||
codeScanFileName: "bandit"
|
||||
uploadPath: "bandit.log"
|
@@ -0,0 +1,45 @@
|
||||
#
|
||||
# Copyright (c) 2022 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
ARG UBUNTU_VER=22.04
|
||||
FROM ubuntu:${UBUNTU_VER} as devel
|
||||
|
||||
# See http://bugs.python.org/issue19846
|
||||
ENV LANG C.UTF-8
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-dev \
|
||||
python3-distutils \
|
||||
autoconf \
|
||||
build-essential \
|
||||
git \
|
||||
libgl1-mesa-glx \
|
||||
libglib2.0-0 \
|
||||
numactl \
|
||||
time \
|
||||
wget \
|
||||
bc \
|
||||
vim
|
||||
|
||||
RUN ln -sf $(which python3) /usr/bin/python
|
||||
|
||||
RUN python -m pip install pip==24.0
|
||||
RUN python -m pip install --no-cache-dir setuptools
|
||||
|
||||
RUN pip list
|
||||
|
||||
WORKDIR /
|
||||
|
@@ -0,0 +1,38 @@
|
||||
#
|
||||
# Copyright (c) 2022 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
ARG UBUNTU_VER=22.04
|
||||
FROM ubuntu:${UBUNTU_VER} as devel
|
||||
|
||||
# See http://bugs.python.org/issue19846
|
||||
ENV LANG C.UTF-8
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
|
||||
aspell \
|
||||
aspell-en \
|
||||
python3 \
|
||||
python3-pip \
|
||||
python3-dev \
|
||||
python3-distutils \
|
||||
wget
|
||||
|
||||
RUN ln -sf $(which python3) /usr/bin/python
|
||||
|
||||
RUN python -m pip install --no-cache-dir \
|
||||
bandit\
|
||||
pyspelling\
|
||||
pydocstyle
|
||||
|
||||
WORKDIR /
|
@@ -0,0 +1,119 @@
|
||||
trigger: none
|
||||
|
||||
pr:
|
||||
autoCancel: true
|
||||
drafts: false
|
||||
branches:
|
||||
include:
|
||||
- master
|
||||
paths:
|
||||
include:
|
||||
- neural_compressor/common
|
||||
- neural_compressor/torch
|
||||
- examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only
|
||||
- setup.py
|
||||
- requirements_pt.txt
|
||||
- .azure-pipelines/scripts/models
|
||||
- .azure-pipelines/model-test-3x.yml
|
||||
- .azure-pipelines/template/docker-template.yml
|
||||
|
||||
variables:
|
||||
OUT_SCRIPT_PATH: $(Build.SourcesDirectory)/.azure-pipelines/scripts/models
|
||||
SCRIPT_PATH: /neural-compressor/.azure-pipelines/scripts
|
||||
|
||||
parameters:
|
||||
- name: PyTorch_Model_3X
|
||||
displayName: Run PyTorch models?
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
- name: PyTorchModelList
|
||||
type: object
|
||||
default:
|
||||
- opt_125m_woq_gptq_int4
|
||||
- opt_125m_woq_gptq_nf4_dq_bnb
|
||||
- opt_125m_woq_gptq_int4_dq_ggml
|
||||
|
||||
stages:
|
||||
- stage: PyTorchModels
|
||||
displayName: Run PyTorch Model
|
||||
pool: ICX-16C
|
||||
dependsOn: []
|
||||
condition: and(succeeded(), eq('${{ parameters.PyTorch_Model_3X }}', 'true'))
|
||||
jobs:
|
||||
- ${{ each model in parameters.PyTorchModelList }}:
|
||||
- job:
|
||||
displayName: ${{ model }}
|
||||
steps:
|
||||
- template: template/model-template.yml
|
||||
parameters:
|
||||
modelName: ${{ model }}
|
||||
framework: "pytorch"
|
||||
APIVersion: "3x"
|
||||
|
||||
- stage: GenerateLogs
|
||||
displayName: Generate Report
|
||||
pool:
|
||||
vmImage: "ubuntu-latest"
|
||||
dependsOn: [PyTorchModels]
|
||||
jobs:
|
||||
- job: GenerateReport
|
||||
steps:
|
||||
- script: |
|
||||
echo ${BUILD_SOURCESDIRECTORY}
|
||||
rm -fr ${BUILD_SOURCESDIRECTORY} || sudo rm -fr ${BUILD_SOURCESDIRECTORY} || true
|
||||
echo y | docker system prune --all
|
||||
displayName: "Clean workspace"
|
||||
- checkout: self
|
||||
clean: true
|
||||
displayName: "Checkout out Repo"
|
||||
- task: DownloadPipelineArtifact@2
|
||||
inputs:
|
||||
artifact:
|
||||
patterns: "**/*_summary.log"
|
||||
path: $(OUT_SCRIPT_PATH)
|
||||
- task: DownloadPipelineArtifact@2
|
||||
inputs:
|
||||
artifact:
|
||||
patterns: "**/*_tuning_info.log"
|
||||
path: $(OUT_SCRIPT_PATH)
|
||||
- task: UsePythonVersion@0
|
||||
displayName: "Use Python 3.10"
|
||||
inputs:
|
||||
versionSpec: "3.10"
|
||||
- script: |
|
||||
cd ${OUT_SCRIPT_PATH}
|
||||
mkdir generated
|
||||
mkdir last_generated
|
||||
pip install requests
|
||||
python -u collect_log_all.py --logs_dir $(OUT_SCRIPT_PATH) --output_dir generated --build_id=$(Build.BuildId)
|
||||
displayName: "Collect all logs"
|
||||
- task: DownloadPipelineArtifact@2
|
||||
continueOnError: true
|
||||
inputs:
|
||||
source: "specific"
|
||||
artifact: "FinalReport"
|
||||
patterns: "**.log"
|
||||
path: $(OUT_SCRIPT_PATH)/last_generated
|
||||
project: $(System.TeamProject)
|
||||
pipeline: "Model-Test"
|
||||
runVersion: "specific"
|
||||
runId: $(refer_buildId)
|
||||
displayName: "Download last logs"
|
||||
- script: |
|
||||
echo "------ Generating final report.html ------"
|
||||
cd ${OUT_SCRIPT_PATH}
|
||||
/usr/bin/bash generate_report.sh --WORKSPACE generated --output_dir generated --last_logt_dir last_generated
|
||||
displayName: "Generate report"
|
||||
- task: PublishPipelineArtifact@1
|
||||
inputs:
|
||||
targetPath: $(OUT_SCRIPT_PATH)/generated
|
||||
artifact: FinalReport
|
||||
publishLocation: "pipeline"
|
||||
displayName: "Publish report"
|
||||
- script: |
|
||||
if [ $(is_perf_reg) == 'true' ]; then
|
||||
echo "Some benchmark regression occurred or the reference data need to be updated, please check artifacts and reports."
|
||||
exit 1
|
||||
fi
|
||||
displayName: "Specify regression"
|
@@ -0,0 +1,173 @@
|
||||
trigger: none
|
||||
|
||||
pr:
|
||||
autoCancel: true
|
||||
drafts: false
|
||||
branches:
|
||||
include:
|
||||
- master
|
||||
paths:
|
||||
include:
|
||||
- neural_compressor
|
||||
- setup.py
|
||||
- requirements.txt
|
||||
- .azure-pipelines/model-test.yml
|
||||
- .azure-pipelines/template/docker-template.yml
|
||||
- .azure-pipelines/scripts/models
|
||||
- examples/tensorflow/oob_models/quantization/ptq
|
||||
- .azure-pipelines/model-test.yml
|
||||
- .azure-pipelines/scripts/fwk_version.sh
|
||||
- .azure-pipelines/scripts/install_nc.sh
|
||||
exclude:
|
||||
- test
|
||||
- neural_compressor/common
|
||||
- neural_compressor/torch
|
||||
- neural_compressor/tensorflow
|
||||
- neural_compressor/onnxrt
|
||||
|
||||
pool: MODEL_PERF_TEST_TF
|
||||
|
||||
variables:
|
||||
OUT_SCRIPT_PATH: $(Build.SourcesDirectory)/.azure-pipelines/scripts/models
|
||||
SCRIPT_PATH: /neural-compressor/.azure-pipelines/scripts
|
||||
|
||||
parameters:
|
||||
- name: TensorFlow_Model
|
||||
displayName: Run TensorFlow models?
|
||||
type: boolean
|
||||
default: true
|
||||
- name: PyTorch_Model
|
||||
displayName: Run PyTorch models?
|
||||
type: boolean
|
||||
default: true
|
||||
- name: ONNX_Model
|
||||
displayName: Run ONNX models?
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
- name: TensorFlowModelList
|
||||
type: object
|
||||
default:
|
||||
- resnet50v1.5
|
||||
- ssd_resnet50_v1
|
||||
- name: PyTorchModelList
|
||||
type: object
|
||||
default:
|
||||
- resnet18_fx
|
||||
- name: ONNXModelList
|
||||
type: object
|
||||
default:
|
||||
- resnet50-v1-12
|
||||
|
||||
stages:
|
||||
- stage: TensorFlowModels
|
||||
displayName: Run TensorFlow Model
|
||||
pool: MODEL_PERF_TEST
|
||||
dependsOn: []
|
||||
condition: and(succeeded(), eq('${{ parameters.TensorFlow_Model }}', 'true'))
|
||||
jobs:
|
||||
- ${{ each model in parameters.TensorFlowModelList }}:
|
||||
- job:
|
||||
displayName: ${{ model }}
|
||||
steps:
|
||||
- template: template/model-template.yml
|
||||
parameters:
|
||||
modelName: ${{ model }}
|
||||
framework: "tensorflow"
|
||||
|
||||
- stage: PyTorchModels
|
||||
displayName: Run PyTorch Model
|
||||
pool: MODEL_PERF_TEST
|
||||
dependsOn: []
|
||||
condition: and(succeeded(), eq('${{ parameters.PyTorch_Model }}', 'true'))
|
||||
jobs:
|
||||
- ${{ each model in parameters.PyTorchModelList }}:
|
||||
- job:
|
||||
displayName: ${{ model }}
|
||||
steps:
|
||||
- template: template/model-template.yml
|
||||
parameters:
|
||||
modelName: ${{ model }}
|
||||
framework: "pytorch"
|
||||
|
||||
- stage: ONNXModels
|
||||
displayName: Run ONNX Model
|
||||
pool: MODEL_PERF_TEST
|
||||
dependsOn: []
|
||||
condition: and(succeeded(), eq('${{ parameters.ONNX_Model }}', 'true'))
|
||||
jobs:
|
||||
- ${{ each model in parameters.ONNXModelList }}:
|
||||
- job:
|
||||
displayName: ${{ model }}
|
||||
steps:
|
||||
- template: template/model-template.yml
|
||||
parameters:
|
||||
modelName: ${{ model }}
|
||||
framework: "onnxrt"
|
||||
|
||||
- stage: GenerateLogs
|
||||
displayName: Generate Report
|
||||
pool:
|
||||
vmImage: "ubuntu-latest"
|
||||
dependsOn: [TensorFlowModels, PyTorchModels, ONNXModels]
|
||||
jobs:
|
||||
- job: GenerateReport
|
||||
steps:
|
||||
- script: |
|
||||
echo ${BUILD_SOURCESDIRECTORY}
|
||||
rm -fr ${BUILD_SOURCESDIRECTORY} || sudo rm -fr ${BUILD_SOURCESDIRECTORY} || true
|
||||
echo y | docker system prune --all
|
||||
displayName: "Clean workspace"
|
||||
- checkout: self
|
||||
clean: true
|
||||
displayName: "Checkout out Repo"
|
||||
- task: DownloadPipelineArtifact@2
|
||||
inputs:
|
||||
artifact:
|
||||
patterns: "**/*_summary.log"
|
||||
path: $(OUT_SCRIPT_PATH)
|
||||
- task: DownloadPipelineArtifact@2
|
||||
inputs:
|
||||
artifact:
|
||||
patterns: "**/*_tuning_info.log"
|
||||
path: $(OUT_SCRIPT_PATH)
|
||||
- task: UsePythonVersion@0
|
||||
displayName: "Use Python 3.10"
|
||||
inputs:
|
||||
versionSpec: "3.10"
|
||||
- script: |
|
||||
cd ${OUT_SCRIPT_PATH}
|
||||
mkdir generated
|
||||
mkdir last_generated
|
||||
pip install requests
|
||||
python -u collect_log_all.py --logs_dir $(OUT_SCRIPT_PATH) --output_dir generated --build_id=$(Build.BuildId)
|
||||
displayName: "Collect all logs"
|
||||
- task: DownloadPipelineArtifact@2
|
||||
continueOnError: true
|
||||
inputs:
|
||||
source: "specific"
|
||||
artifact: "FinalReport"
|
||||
patterns: "**.log"
|
||||
path: $(OUT_SCRIPT_PATH)/last_generated
|
||||
project: $(System.TeamProject)
|
||||
pipeline: "Model-Test"
|
||||
runVersion: "specific"
|
||||
runId: $(refer_buildId)
|
||||
displayName: "Download last logs"
|
||||
- script: |
|
||||
echo "------ Generating final report.html ------"
|
||||
cd ${OUT_SCRIPT_PATH}
|
||||
/usr/bin/bash generate_report.sh --WORKSPACE generated --output_dir generated --last_logt_dir last_generated
|
||||
displayName: "Generate report"
|
||||
- task: PublishPipelineArtifact@1
|
||||
inputs:
|
||||
targetPath: $(OUT_SCRIPT_PATH)/generated
|
||||
artifact: FinalReport
|
||||
publishLocation: "pipeline"
|
||||
displayName: "Publish report"
|
||||
- script: |
|
||||
if [ $(is_perf_reg) == 'true' ]; then
|
||||
echo "Some benchmark regression occurred or the reference data need to be updated, please check artifacts and reports."
|
||||
exit 1
|
||||
fi
|
||||
displayName: "Specify regression"
|
@@ -0,0 +1,81 @@
|
||||
#!/bin/bash
|
||||
|
||||
# -------------- general approach start----------------
|
||||
|
||||
# 1. import this file:
|
||||
# source path/change_color.sh
|
||||
# 2. use COLOR/BG:
|
||||
# $VARIABLE_NAME && out_put_content && $RESET
|
||||
# 3. COLOR + BG:
|
||||
# $COLOR/BG_VARIABLE_NAME && $BG/COLOR_VARIABLE_NAME && out_put_content && $RESET
|
||||
# 4. custom
|
||||
# abbreviation(change number)
|
||||
# txt number range (30, 37)
|
||||
# bg number range (40, 47)
|
||||
# special effects number range (1, 7)
|
||||
# echo -en \\E[number1 + ; + number2 + ; + number3 + m"
|
||||
# e.g - BG_GRAY+LIGHT_RED = "echo -en \\E[47;31m"
|
||||
|
||||
# -------------- general approach end----------------==
|
||||
|
||||
|
||||
# general setting
|
||||
# ------------- light_color start----------------
|
||||
# black
|
||||
LIGHT_BLACK="echo -en \\E[30m"
|
||||
# red
|
||||
LIGHT_RED="echo -en \\E[31m"
|
||||
# green
|
||||
LIGHT_GREEN="echo -en \\E[32m"
|
||||
# yellow
|
||||
LIGHT_YELLOW="echo -en \\E[33m"
|
||||
# blue
|
||||
LIGHT_BLUE="echo -en \\E[34m"
|
||||
# purple
|
||||
LIGHT_PURPLE="echo -en \\E[35m"
|
||||
# cyan
|
||||
LIGHT_CYAN="echo -en \\E[36m"
|
||||
# gray
|
||||
LIGHT_GRAY="echo -en \\E[37m"
|
||||
# ------------- light_color end----------------
|
||||
|
||||
# ------------- bold_color start----------------
|
||||
# black
|
||||
BOLD_BLACK="echo -en \\E[1;30m"
|
||||
# red
|
||||
BOLD_RED="echo -en \\E[1;31m"
|
||||
# green
|
||||
BOLD_GREEN="echo -en \\E[1;32m"
|
||||
# yellow
|
||||
BOLD_YELLOW="echo -en \\E[1;33m"
|
||||
# blue
|
||||
BOLD_BLUE="echo -en \\E[1;34m"
|
||||
# purple
|
||||
BOLD_PURPLE="echo -en \\E[1;35m"
|
||||
# cyan
|
||||
BOLD_CYAN="echo -en \\E[1;36m"
|
||||
# gray
|
||||
BOLD_GRAY="echo -en \\E[1;37m"
|
||||
# ------------- bold_color end----------------
|
||||
|
||||
# ------------- background_color start----------------
|
||||
# black
|
||||
BG_BLACK="echo -en \\E[40m"
|
||||
# red
|
||||
BG_RED="echo -en \\E[41m"
|
||||
# green
|
||||
BG_GREEN="echo -en \\E[42m"
|
||||
# yellow
|
||||
BG_YELLOW="echo -en \\E[43m"
|
||||
# blue
|
||||
BG_BLUE="echo -en \\E[44m"
|
||||
# purple
|
||||
BG_PURPLE="echo -en \\E[45m"
|
||||
# cyan
|
||||
BG_CYAN="echo -en \\E[46m"
|
||||
# gray
|
||||
BG_GRAY="echo -en \\E[47m"
|
||||
# ------------- background_color end----------------
|
||||
|
||||
# close
|
||||
RESET="echo -en \\E[0m"
|
@@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
|
||||
for var in "$@"
|
||||
do
|
||||
case $var in
|
||||
--scan_module=*)
|
||||
scan_module=$(echo $var |cut -f2 -d=)
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
source /neural-compressor/.azure-pipelines/scripts/change_color.sh
|
||||
RESET="echo -en \\E[0m \\n" # close color
|
||||
|
||||
log_dir="/neural-compressor/.azure-pipelines/scripts/codeScan/scanLog"
|
||||
mkdir -p $log_dir
|
||||
|
||||
python -m bandit -r -lll -iii "/neural-compressor/${scan_module}" >$log_dir/bandit.log
|
||||
exit_code=$?
|
||||
|
||||
$BOLD_YELLOW && echo " ----------------- Current bandit cmd start --------------------------" && $RESET
|
||||
echo "python -m bandit -r -lll -iii /neural-compressor/${scan_module} > $log_dir/bandit.log"
|
||||
$BOLD_YELLOW && echo " ----------------- Current bandit cmd end --------------------------" && $RESET
|
||||
|
||||
$BOLD_YELLOW && echo " ----------------- Current log file output start --------------------------"
|
||||
cat $log_dir/bandit.log
|
||||
$BOLD_YELLOW && echo " ----------------- Current log file output end --------------------------" && $RESET
|
||||
|
||||
if [ ${exit_code} -ne 0 ]; then
|
||||
$BOLD_RED && echo "Error!! Please Click on the artifact button to download and view Bandit error details." && $RESET
|
||||
exit 1
|
||||
fi
|
||||
$BOLD_PURPLE && echo "Congratulations, Bandit check passed!" && $LIGHT_PURPLE && echo " You can click on the artifact button to see the log details." && $RESET
|
||||
exit 0
|
@@ -0,0 +1,15 @@
|
||||
activ
|
||||
ans
|
||||
assertin
|
||||
datas
|
||||
ende
|
||||
lates
|
||||
masia
|
||||
mutli
|
||||
nd
|
||||
ot
|
||||
rouge
|
||||
te
|
||||
tne
|
||||
ue
|
||||
womens
|
@@ -0,0 +1,43 @@
|
||||
#!/bin/bash
|
||||
|
||||
for var in "$@"
|
||||
do
|
||||
case $var in
|
||||
--scan_module=*)
|
||||
scan_module=$(echo $var |cut -f2 -d=)
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
source /neural-compressor/.azure-pipelines/scripts/change_color.sh
|
||||
RESET="echo -en \\E[0m \\n" # close color
|
||||
|
||||
work_dir="/neural-compressor/.azure-pipelines/scripts/codeScan/pydocstyle"
|
||||
log_dir="$work_dir/../scanLog"
|
||||
mkdir -p $log_dir
|
||||
|
||||
scan_path="scan_path.txt"
|
||||
|
||||
exit_code=0
|
||||
for line in $(cat ${work_dir}/${scan_path})
|
||||
do
|
||||
pydocstyle --convention=google $line >> $log_dir/pydocstyle.log
|
||||
if [ $? -ne 0 ]; then
|
||||
exit_code=1
|
||||
fi
|
||||
done
|
||||
|
||||
$BOLD_YELLOW && echo " ----------------- Current pydocstyle cmd start --------------------------" && $RESET
|
||||
echo "pydocstyle --convention=google \$line > $log_dir/pydocstyle.log"
|
||||
$BOLD_YELLOW && echo " ----------------- Current pydocstyle cmd end --------------------------" && $RESET
|
||||
|
||||
$BOLD_YELLOW && echo " ----------------- Current log file output start --------------------------"
|
||||
cat $log_dir/pydocstyle.log
|
||||
$BOLD_YELLOW && echo " ----------------- Current log file output end --------------------------" && $RESET
|
||||
|
||||
if [ ${exit_code} -ne 0 ]; then
|
||||
$BOLD_RED && echo "Error!! Please Click on the artifact button to download and view DocStyle error details." && $RESET
|
||||
exit 1
|
||||
fi
|
||||
$BOLD_PURPLE && echo "Congratulations, DocStyle check passed!" && $LIGHT_PURPLE && echo " You can click on the artifact button to see the log details." && $RESET
|
||||
exit 0
|
@@ -0,0 +1,27 @@
|
||||
/neural-compressor/neural_compressor/adaptor/mxnet_utils
|
||||
/neural-compressor/neural_compressor/adaptor/ox_utils
|
||||
/neural-compressor/neural_compressor/adaptor/tensorflow.py
|
||||
/neural-compressor/neural_compressor/adaptor/tf_utils
|
||||
/neural-compressor/neural_compressor/algorithm
|
||||
/neural-compressor/neural_compressor/benchmark.py
|
||||
/neural-compressor/neural_compressor/config.py
|
||||
/neural-compressor/neural_compressor/contrib
|
||||
/neural-compressor/neural_compressor/experimental
|
||||
/neural-compressor/neural_compressor/mix_precision.py
|
||||
/neural-compressor/neural_compressor/model
|
||||
/neural-compressor/neural_compressor/objective.py
|
||||
/neural-compressor/neural_compressor/pruner
|
||||
/neural-compressor/neural_compressor/quantization.py
|
||||
/neural-compressor/neural_compressor/strategy
|
||||
/neural-compressor/neural_compressor/training.py
|
||||
/neural-compressor/neural_compressor/utils
|
||||
/neural-compressor/neural_compressor/common
|
||||
/neural-compressor/neural_compressor/tensorflow
|
||||
/neural-compressor/neural_compressor/torch/algorithms/layer_wise
|
||||
/neural-compressor/neural_compressor/torch/algorithms/mixed_precision
|
||||
/neural-compressor/neural_compressor/torch/algorithms/mx_quant
|
||||
/neural-compressor/neural_compressor/torch/algorithms/pt2e_quant
|
||||
/neural-compressor/neural_compressor/torch/algorithms/smooth_quant
|
||||
/neural-compressor/neural_compressor/torch/algorithms/static_quant
|
||||
/neural-compressor/neural_compressor/torch/algorithms/weight_only
|
||||
/neural-compressor/neural_compressor/torch/export
|
@@ -0,0 +1,10 @@
|
||||
#!/bin/bash
|
||||
|
||||
echo "export FWs version..."
|
||||
export tensorflow_version='2.15.0-official'
|
||||
export pytorch_version='2.5.1+cpu'
|
||||
export torchvision_version='0.20.1'
|
||||
export ipex_version='2.5.0+cpu'
|
||||
export onnx_version='1.17.0'
|
||||
export onnxruntime_version='1.20.0'
|
||||
export mxnet_version='1.9.1'
|
@@ -0,0 +1,31 @@
|
||||
#!/bin/bash
|
||||
|
||||
echo -e "##[group]Install Neural Compressor ... "
|
||||
cd /neural-compressor
|
||||
if [[ $1 = *"3x_pt"* ]]; then
|
||||
python -m pip install --no-cache-dir -r requirements_pt.txt
|
||||
if [[ $1 = *"3x_pt_fp8"* ]]; then
|
||||
pip uninstall neural_compressor_3x_pt -y || true
|
||||
python setup.py pt bdist_wheel
|
||||
else
|
||||
echo -e "\n Install torch CPU ... "
|
||||
pip install torch==2.5.1 --index-url https://download.pytorch.org/whl/cpu
|
||||
python -m pip install intel-extension-for-pytorch==2.5.0 oneccl_bind_pt==2.5.0 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
|
||||
python -m pip install --no-cache-dir -r requirements.txt
|
||||
python setup.py bdist_wheel
|
||||
fi
|
||||
pip install --no-deps dist/neural_compressor*.whl --force-reinstall
|
||||
elif [[ $1 = *"3x_tf"* ]]; then
|
||||
python -m pip install --no-cache-dir -r requirements.txt
|
||||
python -m pip install --no-cache-dir -r requirements_tf.txt
|
||||
python setup.py bdist_wheel
|
||||
pip install dist/neural_compressor*.whl --force-reinstall
|
||||
else
|
||||
python -m pip install --no-cache-dir -r requirements.txt
|
||||
python setup.py bdist_wheel
|
||||
pip install dist/neural_compressor*.whl --force-reinstall
|
||||
fi
|
||||
|
||||
echo -e "\n pip list after install Neural Compressor ... "
|
||||
echo "##[endgroup]"
|
||||
pip list
|
@@ -0,0 +1,79 @@
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import requests
|
||||
|
||||
parser = argparse.ArgumentParser(allow_abbrev=False)
|
||||
parser.add_argument("--logs_dir", type=str, default=".")
|
||||
parser.add_argument("--output_dir", type=str, default=".")
|
||||
parser.add_argument("--build_id", type=str, default="0")
|
||||
args = parser.parse_args()
|
||||
print(args)
|
||||
|
||||
|
||||
def main():
|
||||
file_dir = args.logs_dir
|
||||
summary_content = ["OS;Platform;Framework;Version;Precision;Model;Mode;Type;BS;Value;Url\n"]
|
||||
tuning_info_content = ["OS;Platform;Framework;Version;Model;Strategy;Tune_time\n"]
|
||||
url_dict = parse_download_url()
|
||||
# get full path of all files
|
||||
for root, dirs, files in os.walk(file_dir):
|
||||
for name in files:
|
||||
file_name = os.path.join(root, name)
|
||||
print(file_name)
|
||||
if "_summary.log" in name:
|
||||
for line in open(file_name, "r"):
|
||||
if "linux" in line:
|
||||
line = line.replace("<url>", parse_summary_log(line, url_dict))
|
||||
summary_content.append(line)
|
||||
if "_tuning_info.log" in name:
|
||||
for line in open(file_name, "r"):
|
||||
if "linux" in line:
|
||||
line = line.replace("<url>", parse_tuning_log(line, url_dict))
|
||||
tuning_info_content.append(line)
|
||||
f = open(args.output_dir + "/summary.log", "a")
|
||||
for summary in summary_content:
|
||||
f.writelines(str(summary))
|
||||
f2 = open(args.output_dir + "/tuning_info.log", "a")
|
||||
for tuning_info in tuning_info_content:
|
||||
f2.writelines(str(tuning_info))
|
||||
|
||||
|
||||
def parse_tuning_log(line, url_dict):
|
||||
"""Parsing {Framework}-{Model}-tune.log to get tuning result."""
|
||||
result = line.split(";")
|
||||
OS, Platform, Framework, Version, Model, Strategy, Tune_time, Tuning_trials, URL, __ = result
|
||||
file_name = f"{Framework}-{Model}-tune.log"
|
||||
download_url = url_dict.get(f"{Framework}_{Model}")
|
||||
download_url = f"{download_url}{file_name}"
|
||||
return download_url
|
||||
|
||||
|
||||
def parse_summary_log(line, url_dict):
|
||||
"""Parse {Framework}-{Model}-tune.log to get benchmarking accuracy result."""
|
||||
result = line.split(";")
|
||||
OS, Platform, Framework, Version, Precision, Model, Mode, Type, BS, Value, Url = result
|
||||
file_name = f"{Framework}-{Model}-tune.log"
|
||||
download_url = url_dict.get(f"{Framework}_{Model}")
|
||||
download_url = f"{download_url}{file_name}"
|
||||
return download_url
|
||||
|
||||
|
||||
def parse_download_url():
|
||||
"""Get azure artifact information."""
|
||||
azure_artifact_api_url = (
|
||||
f"https://dev.azure.com/lpot-inc/neural-compressor/_apis/build/builds/{args.build_id}/artifacts?api-version=5.1"
|
||||
)
|
||||
azure_artifacts_data = dict(requests.get(azure_artifact_api_url).json().items())
|
||||
artifact_count = azure_artifacts_data.get("count")
|
||||
artifact_value = azure_artifacts_data.get("value")
|
||||
url_dict = {}
|
||||
for item in artifact_value:
|
||||
artifact_download_url = item.get("resource").get("downloadUrl")
|
||||
artifact_download_url = f"{artifact_download_url[:-3]}file&subPath=%2F"
|
||||
url_dict[item.get("name")] = artifact_download_url
|
||||
return url_dict
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -0,0 +1,309 @@
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
|
||||
parser = argparse.ArgumentParser(allow_abbrev=False)
|
||||
parser.add_argument("--framework", type=str, required=True)
|
||||
parser.add_argument("--fwk_ver", type=str, required=True)
|
||||
parser.add_argument("--model", type=str, required=True)
|
||||
parser.add_argument("--logs_dir", type=str, default=".")
|
||||
parser.add_argument("--output_dir", type=str, default=".")
|
||||
parser.add_argument("--build_id", type=str, default="0")
|
||||
parser.add_argument("--stage", type=str, default="collect_log")
|
||||
parser.add_argument("--gap", type=float, default=0.05)
|
||||
parser.add_argument("--inc_new_api", type=str, default="")
|
||||
args = parser.parse_args()
|
||||
print("====== collecting model test log =======")
|
||||
OS = "linux"
|
||||
PLATFORM = "icx"
|
||||
URL = (
|
||||
"https://dev.azure.com/lpot-inc/neural-compressor/_build/results?buildId="
|
||||
+ args.build_id
|
||||
+ "&view=artifacts&pathAsName=false&type=publishedArtifacts"
|
||||
)
|
||||
OOB_MODEL_LIST = ["darknet19", "densenet-121", "resnet-101"]
|
||||
|
||||
|
||||
def get_model_tuning_dict_results():
|
||||
tuning_result_dict = {}
|
||||
|
||||
if os.path.exists(tuning_log):
|
||||
print("tuning log found")
|
||||
tmp = {"fp32_acc": 0, "int8_acc": 0, "tuning_trials": 0}
|
||||
with open(tuning_log, "r") as f:
|
||||
for line in f:
|
||||
parse_tuning_line(line, tmp)
|
||||
print(tmp)
|
||||
|
||||
tuning_result_dict = {
|
||||
"OS": OS,
|
||||
"Platform": PLATFORM,
|
||||
"Framework": args.framework,
|
||||
"Version": args.fwk_ver,
|
||||
"Model": args.model,
|
||||
"Strategy": tmp.get("strategy", "basic"),
|
||||
"Tune_time": tmp.get("tune_time"),
|
||||
}
|
||||
benchmark_accuracy_result_dict = {
|
||||
"int8": {
|
||||
"OS": OS,
|
||||
"Platform": PLATFORM,
|
||||
"Framework": args.framework,
|
||||
"Version": args.fwk_ver,
|
||||
"Model": args.model,
|
||||
"Mode": "Inference",
|
||||
"Type": "Accuracy",
|
||||
"BS": 1,
|
||||
"Value": tmp.get("int8_acc"),
|
||||
"Url": URL,
|
||||
},
|
||||
"fp32": {
|
||||
"OS": OS,
|
||||
"Platform": PLATFORM,
|
||||
"Framework": args.framework,
|
||||
"Version": args.fwk_ver,
|
||||
"Model": args.model,
|
||||
"Mode": "Inference",
|
||||
"Type": "Accuracy",
|
||||
"BS": 1,
|
||||
"Value": tmp.get("fp32_acc"),
|
||||
"Url": URL,
|
||||
},
|
||||
}
|
||||
|
||||
return tuning_result_dict, benchmark_accuracy_result_dict
|
||||
else:
|
||||
return {}, {}
|
||||
|
||||
|
||||
def get_model_benchmark_dict_results():
|
||||
benchmark_performance_result_dict = {"int8": {}, "fp32": {}}
|
||||
for precision in ["int8", "fp32"]:
|
||||
throughput = 0.0
|
||||
bs = 1
|
||||
for root, dirs, files in os.walk(args.logs_dir):
|
||||
for name in files:
|
||||
file_name = os.path.join(root, name)
|
||||
if "performance-" + precision in name:
|
||||
for line in open(file_name, "r"):
|
||||
result = parse_perf_line(line)
|
||||
if result.get("throughput"):
|
||||
throughput += result.get("throughput")
|
||||
if result.get("batch_size"):
|
||||
bs = result.get("batch_size")
|
||||
|
||||
benchmark_performance_result_dict[precision] = {
|
||||
"OS": OS,
|
||||
"Platform": PLATFORM,
|
||||
"Framework": args.framework,
|
||||
"Version": args.fwk_ver,
|
||||
"Model": args.model,
|
||||
"Mode": "Inference",
|
||||
"Type": "Performance",
|
||||
"BS": 1,
|
||||
"Value": throughput,
|
||||
"Url": URL,
|
||||
}
|
||||
|
||||
return benchmark_performance_result_dict
|
||||
|
||||
|
||||
def get_refer_data():
|
||||
refer_log = os.path.join(f"{args.logs_dir}_refer_log", f"{args.framework}_{args.model}_summary.log")
|
||||
result = {}
|
||||
if os.path.exists(refer_log):
|
||||
with open(refer_log, "r") as f:
|
||||
lines = f.readlines()
|
||||
keys = lines[0].split(";")
|
||||
values = [lines[i].split(";") for i in range(1, len(lines))]
|
||||
for value in values:
|
||||
precision = value[keys.index("Precision")]
|
||||
Type = value[keys.index("Type")]
|
||||
result[f"{precision}_{Type}"] = (
|
||||
float(value[keys.index("Value")]) if value[keys.index("Value")] != "unknown" else "unknown"
|
||||
)
|
||||
return result
|
||||
else:
|
||||
print(f"refer log file: {refer_log} not found")
|
||||
return 0
|
||||
|
||||
|
||||
def collect_log():
|
||||
results = []
|
||||
tuning_infos = []
|
||||
print(f"quantization log dir is {tuning_log}")
|
||||
# get model tuning results
|
||||
if os.path.exists(tuning_log):
|
||||
print("quantization log found")
|
||||
tmp = {"fp32_acc": 0, "int8_acc": 0, "tuning_trials": 0}
|
||||
with open(tuning_log, "r") as f:
|
||||
for line in f:
|
||||
parse_tuning_line(line, tmp)
|
||||
print(tmp)
|
||||
|
||||
# oob_model no need acc
|
||||
if (args.model in OOB_MODEL_LIST) and args.framework == "tensorflow":
|
||||
tmp["fp32_acc"], tmp["int8_acc"] = "unknown", "unknown"
|
||||
|
||||
# set for 3x woq models
|
||||
if args.inc_new_api.split("_")[0] == "3x":
|
||||
tmp["fp32_acc"], tmp["tuning_trials"], tmp["strategy"] = "unknown", "", ""
|
||||
|
||||
if "acc_bs" in tmp:
|
||||
acc_bs = tmp["acc_bs"]
|
||||
else:
|
||||
acc_bs = 1
|
||||
results.append(
|
||||
"{};{};{};{};FP32;{};Inference;Accuracy;{};{};{}\n".format(
|
||||
OS, PLATFORM, args.framework, args.fwk_ver, args.model, acc_bs, tmp["fp32_acc"], "<url>"
|
||||
)
|
||||
)
|
||||
results.append(
|
||||
"{};{};{};{};INT8;{};Inference;Accuracy;{};{};{}\n".format(
|
||||
OS, PLATFORM, args.framework, args.fwk_ver, args.model, acc_bs, tmp["int8_acc"], "<url>"
|
||||
)
|
||||
)
|
||||
tuning_infos.append(
|
||||
";".join(
|
||||
[
|
||||
OS,
|
||||
PLATFORM,
|
||||
args.framework,
|
||||
args.fwk_ver,
|
||||
args.model,
|
||||
tmp.get("strategy", "basic"),
|
||||
str(tmp["tune_time"]),
|
||||
str(tmp["tuning_trials"]),
|
||||
"<url>",
|
||||
f"{round(tmp['max_mem_size'] / tmp['total_mem_size'] * 100, 4)}%",
|
||||
]
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
|
||||
# get model benchmark results
|
||||
if args.inc_new_api.split("_")[0] != "3x":
|
||||
for precision in ["int8", "fp32"]:
|
||||
throughput = 0.0
|
||||
bs = 1
|
||||
for root, dirs, files in os.walk(args.logs_dir):
|
||||
for name in files:
|
||||
file_name = os.path.join(root, name)
|
||||
print(file_name)
|
||||
if "performance-" + precision in name:
|
||||
for line in open(file_name, "r"):
|
||||
result = parse_perf_line(line)
|
||||
if result.get("throughput"):
|
||||
throughput += result.get("throughput")
|
||||
if result.get("batch_size"):
|
||||
bs = result.get("batch_size")
|
||||
results.append(
|
||||
"{};{};{};{};{};{};Inference;Performance;{};{};{}\n".format(
|
||||
OS, PLATFORM, args.framework, args.fwk_ver, precision.upper(), args.model, bs, throughput, URL
|
||||
)
|
||||
)
|
||||
|
||||
# write model logs
|
||||
f = open(args.output_dir + "/" + args.framework + "_" + args.model + "_summary.log", "a")
|
||||
f.writelines("OS;Platform;Framework;Version;Precision;Model;Mode;Type;BS;Value;Url\n")
|
||||
for result in results:
|
||||
f.writelines(str(result))
|
||||
f2 = open(args.output_dir + "/" + args.framework + "_" + args.model + "_tuning_info.log", "a")
|
||||
f2.writelines("OS;Platform;Framework;Version;Model;Strategy;Tune_time\n")
|
||||
for tuning_info in tuning_infos:
|
||||
f2.writelines(str(tuning_info))
|
||||
|
||||
|
||||
def parse_tuning_line(line, tmp):
|
||||
tuning_strategy = re.search(r"Tuning strategy:\s+([A-Za-z]+)", line)
|
||||
if tuning_strategy and tuning_strategy.group(1):
|
||||
tmp["strategy"] = tuning_strategy.group(1)
|
||||
|
||||
baseline_acc = re.search(
|
||||
r"FP32 baseline is:\s+\[Accuracy:\s(\d+(\.\d+)?), Duration \(seconds\):\s*(\d+(\.\d+)?)\]", line
|
||||
)
|
||||
if baseline_acc and baseline_acc.group(1):
|
||||
tmp["fp32_acc"] = float(baseline_acc.group(1))
|
||||
|
||||
tuned_acc = re.search(
|
||||
r"Best tune result is:\s+\[Accuracy:\s(\d+(\.\d+)?), Duration \(seconds\):\s(\d+(\.\d+)?)\]", line
|
||||
)
|
||||
if tuned_acc and tuned_acc.group(1):
|
||||
tmp["int8_acc"] = float(tuned_acc.group(1))
|
||||
|
||||
if args.inc_new_api.split("_")[0] == "3x":
|
||||
quant_acc = re.search(r"Accuracy:\s+(\d+(\.\d+)?)", line)
|
||||
if quant_acc and quant_acc.group(1):
|
||||
tmp["int8_acc"] = float(quant_acc.group(1))
|
||||
batch_size = re.search(r"Batch size = ([0-9]+)", line)
|
||||
if batch_size and batch_size.group(1):
|
||||
tmp["acc_bs"] = int(batch_size.group(1))
|
||||
|
||||
tune_trial = re.search(r"Tune \d*\s*result is:", line)
|
||||
if tune_trial:
|
||||
tmp["tuning_trials"] += 1
|
||||
|
||||
tune_time = re.search(r"Tuning time spend:\s+(\d+(\.\d+)?)s", line)
|
||||
if tune_time and tune_time.group(1):
|
||||
tmp["tune_time"] = int(tune_time.group(1))
|
||||
|
||||
fp32_model_size = re.search(r"The input model size is:\s+(\d+(\.\d+)?)", line)
|
||||
if fp32_model_size and fp32_model_size.group(1):
|
||||
tmp["fp32_model_size"] = int(fp32_model_size.group(1))
|
||||
|
||||
int8_model_size = re.search(r"The output model size is:\s+(\d+(\.\d+)?)", line)
|
||||
if int8_model_size and int8_model_size.group(1):
|
||||
tmp["int8_model_size"] = int(int8_model_size.group(1))
|
||||
|
||||
total_mem_size = re.search(r"Total resident size\D*([0-9]+)", line)
|
||||
if total_mem_size and total_mem_size.group(1):
|
||||
tmp["total_mem_size"] = float(total_mem_size.group(1))
|
||||
|
||||
max_mem_size = re.search(r"Maximum resident set size\D*([0-9]+)", line)
|
||||
if max_mem_size and max_mem_size.group(1):
|
||||
tmp["max_mem_size"] = float(max_mem_size.group(1))
|
||||
|
||||
|
||||
def parse_perf_line(line):
|
||||
perf_data = {}
|
||||
|
||||
throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?)", line)
|
||||
if throughput and throughput.group(1):
|
||||
perf_data.update({"throughput": float(throughput.group(1))})
|
||||
|
||||
batch_size = re.search(r"Batch size = ([0-9]+)", line)
|
||||
if batch_size and batch_size.group(1):
|
||||
perf_data.update({"batch_size": int(batch_size.group(1))})
|
||||
|
||||
return perf_data
|
||||
|
||||
|
||||
def check_status(precision, precision_upper, check_accuracy=False):
|
||||
performance_result = get_model_benchmark_dict_results()
|
||||
current_performance = performance_result.get(precision).get("Value")
|
||||
refer_performance = refer.get(f"{precision_upper}_Performance")
|
||||
print(f"current_performance_data = {current_performance:.3f}, refer_performance_data = {refer_performance:.3f}")
|
||||
assert (refer_performance - current_performance) / refer_performance <= args.gap
|
||||
|
||||
if check_accuracy:
|
||||
_, accuracy_result = get_model_tuning_dict_results()
|
||||
current_accuracy = accuracy_result.get(precision).get("Value")
|
||||
refer_accuracy = refer.get(f"{precision_upper}_Accuracy")
|
||||
print(f"current_accuracy_data = {current_accuracy:.3f}, refer_accuarcy_data = {refer_accuracy:.3f}")
|
||||
assert abs(current_accuracy - refer_accuracy) <= 0.001
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
tuning_log = os.path.join(args.logs_dir, f"{args.framework}-{args.model}-tune.log")
|
||||
refer = get_refer_data()
|
||||
|
||||
if args.stage == "collect_log":
|
||||
collect_log()
|
||||
elif args.stage == "int8_benchmark" and refer:
|
||||
check_status("int8", "INT8")
|
||||
elif args.stage == "fp32_benchmark" and refer:
|
||||
check_status("fp32", "FP32")
|
||||
elif not refer:
|
||||
print("skip check status")
|
||||
else:
|
||||
raise ValueError(f"{args.stage} does not exist")
|
@@ -0,0 +1,147 @@
|
||||
#!/bin/bash
|
||||
set -eo pipefail
|
||||
source /neural-compressor/.azure-pipelines/scripts/change_color.sh
|
||||
|
||||
# get parameters
|
||||
PATTERN='[-a-zA-Z0-9_]*='
|
||||
|
||||
for i in "$@"; do
|
||||
case $i in
|
||||
--yaml=*)
|
||||
yaml=$(echo $i | sed "s/${PATTERN}//")
|
||||
;;
|
||||
--framework=*)
|
||||
framework=$(echo $i | sed "s/${PATTERN}//")
|
||||
;;
|
||||
--fwk_ver=*)
|
||||
fwk_ver=$(echo $i | sed "s/${PATTERN}//")
|
||||
;;
|
||||
--torch_vision_ver=*)
|
||||
torch_vision_ver=$(echo $i | sed "s/${PATTERN}//")
|
||||
;;
|
||||
--model=*)
|
||||
model=$(echo $i | sed "s/${PATTERN}//")
|
||||
;;
|
||||
--model_src_dir=*)
|
||||
model_src_dir=$(echo $i | sed "s/${PATTERN}//")
|
||||
;;
|
||||
--dataset_location=*)
|
||||
dataset_location=$(echo $i | sed "s/${PATTERN}//")
|
||||
;;
|
||||
--batch_size=*)
|
||||
batch_size=$(echo $i | sed "s/${PATTERN}//")
|
||||
;;
|
||||
--strategy=*)
|
||||
strategy=$(echo $i | sed "s/${PATTERN}//")
|
||||
;;
|
||||
--new_benchmark=*)
|
||||
new_benchmark=$(echo $i | sed "s/${PATTERN}//")
|
||||
;;
|
||||
--inc_new_api=*)
|
||||
inc_new_api=$(echo $i | sed "s/${PATTERN}//")
|
||||
;;
|
||||
*)
|
||||
echo "Parameter $i not recognized."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models"
|
||||
log_dir="/neural-compressor/.azure-pipelines/scripts/models"
|
||||
if [[ "${inc_new_api}" == "3x"* ]]; then
|
||||
WORK_SOURCE_DIR="/neural-compressor/examples/3.x_api/${framework}"
|
||||
git clone https://github.com/intel/intel-extension-for-transformers.git /itrex
|
||||
cd /itrex
|
||||
pip install -r requirements.txt
|
||||
pip install -v .
|
||||
else
|
||||
WORK_SOURCE_DIR="/neural-compressor/examples/${framework}"
|
||||
fi
|
||||
|
||||
$BOLD_YELLOW && echo "processing ${framework}-${fwk_ver}-${model}" && $RESET
|
||||
|
||||
$BOLD_YELLOW && echo "======= creat log_dir =========" && $RESET
|
||||
if [ -d "${log_dir}/${model}" ]; then
|
||||
$BOLD_GREEN && echo "${log_dir}/${model} already exists, don't need to mkdir." && $RESET
|
||||
else
|
||||
$BOLD_GREEN && echo "no log dir ${log_dir}/${model}, create." && $RESET
|
||||
cd ${log_dir}
|
||||
mkdir ${model}
|
||||
fi
|
||||
|
||||
$BOLD_YELLOW && echo "====== install requirements ======" && $RESET
|
||||
/bin/bash /neural-compressor/.azure-pipelines/scripts/install_nc.sh ${inc_new_api}
|
||||
|
||||
mkdir -p ${WORK_SOURCE_DIR}
|
||||
cd ${WORK_SOURCE_DIR}
|
||||
if [[ "${inc_new_api}" == "false" ]]; then
|
||||
echo "copy old api examples to workspace..."
|
||||
git clone -b old_api_examples https://github.com/intel/neural-compressor.git old-lpot-models
|
||||
cd old-lpot-models
|
||||
git branch
|
||||
cd -
|
||||
rm -rf ${model_src_dir}
|
||||
mkdir -p ${model_src_dir}
|
||||
cp -r old-lpot-models/examples/${framework}/${model_src_dir} ${WORK_SOURCE_DIR}/${model_src_dir}/../
|
||||
fi
|
||||
|
||||
cd ${model_src_dir}
|
||||
|
||||
if [[ "${fwk_ver}" != "latest" ]]; then
|
||||
pip install ruamel.yaml==0.17.40
|
||||
pip install psutil
|
||||
pip install protobuf==4.23.4
|
||||
if [[ "${framework}" == "tensorflow" ]]; then
|
||||
if [[ "${fwk_ver}" == *"-official" ]]; then
|
||||
pip install tensorflow==${fwk_ver%-official}
|
||||
else
|
||||
pip install intel-tensorflow==${fwk_ver}
|
||||
fi
|
||||
elif [[ "${framework}" == "pytorch" ]]; then
|
||||
pip install torch==${fwk_ver} --index-url https://download.pytorch.org/whl/cpu
|
||||
pip install torchvision==${torch_vision_ver} --index-url https://download.pytorch.org/whl/cpu
|
||||
elif [[ "${framework}" == "onnxrt" ]]; then
|
||||
pip install onnx==1.15.0
|
||||
pip install onnxruntime==${fwk_ver}
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -f "requirements.txt" ]; then
|
||||
sed -i '/neural-compressor/d' requirements.txt
|
||||
if [ "${framework}" == "onnxrt" ]; then
|
||||
sed -i '/^onnx>=/d;/^onnx==/d;/^onnxruntime>=/d;/^onnxruntime==/d' requirements.txt
|
||||
fi
|
||||
if [ "${framework}" == "tensorflow" ]; then
|
||||
sed -i '/tensorflow==/d;/tensorflow$/d' requirements.txt
|
||||
sed -i '/^intel-tensorflow/d' requirements.txt
|
||||
fi
|
||||
if [ "${framework}" == "pytorch" ]; then
|
||||
sed -i '/torch==/d;/torch$/d;/torchvision==/d;/torchvision$/d' requirements.txt
|
||||
fi
|
||||
n=0
|
||||
until [ "$n" -ge 5 ]; do
|
||||
python -m pip install -r requirements.txt && break
|
||||
n=$((n + 1))
|
||||
sleep 5
|
||||
done
|
||||
pip list
|
||||
else
|
||||
$BOLD_RED && echo "Not found requirements.txt file." && $RESET
|
||||
fi
|
||||
|
||||
if [[ "${inc_new_api}" == "false" ]]; then
|
||||
$BOLD_YELLOW && echo "======== update yaml config ========" && $RESET
|
||||
$BOLD_YELLOW && echo -e "\nPrint origin yaml..." && $RESET
|
||||
cat ${yaml}
|
||||
python ${SCRIPTS_PATH}/update_yaml_config.py \
|
||||
--yaml=${yaml} \
|
||||
--framework=${framework} \
|
||||
--dataset_location=${dataset_location} \
|
||||
--batch_size=${batch_size} \
|
||||
--strategy=${strategy} \
|
||||
--new_benchmark=${new_benchmark} \
|
||||
--multi_instance='true'
|
||||
$BOLD_YELLOW && echo -e "\nPrint updated yaml... " && $RESET
|
||||
cat ${yaml}
|
||||
fi
|
@@ -0,0 +1,625 @@
|
||||
#!/bin/bash
|
||||
|
||||
# WORKSPACE=.
|
||||
# summaryLog=summary.log
|
||||
# summaryLogLast=summary.log
|
||||
# tuneLog=tuning_info.log
|
||||
# tuneLogLast=tuning_info.log
|
||||
# overview_log=summary_overview.log
|
||||
# coverage_summary=coverage_summary.log
|
||||
# nc_code_lines_summary=nc_code_lines_summary.csv
|
||||
# engine_code_lines_summary=engine_code_lines_summary.csv
|
||||
|
||||
#lines_coverage_threshold=80
|
||||
#branches_coverage_threshold=75
|
||||
#
|
||||
#pass_status="<td style=\"background-color:#90EE90\">Pass</td>"
|
||||
#fail_status="<td style=\"background-color:#FFD2D2\">Fail</td>"
|
||||
#verify_status="<td style=\"background-color:#f2ea0a\">Verify</td>"
|
||||
|
||||
|
||||
# shellcheck disable=SC2120
|
||||
|
||||
while [[ $# -gt 0 ]];do
|
||||
key=${1}
|
||||
case ${key} in
|
||||
-w|--WORKSPACE)
|
||||
WORKSPACE=${2}
|
||||
shift 2
|
||||
;;
|
||||
--script_path)
|
||||
script_path=${2}
|
||||
shift 2
|
||||
;;
|
||||
--output_dir)
|
||||
output_dir=${2}
|
||||
shift 2
|
||||
;;
|
||||
--last_logt_dir)
|
||||
last_logt_dir=${2}
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "workspace: ${WORKSPACE}"
|
||||
echo "script_path: ${script_path}"
|
||||
|
||||
summaryLog="${WORKSPACE}/summary.log"
|
||||
tuneLog="${WORKSPACE}/tuning_info.log"
|
||||
echo "summaryLog: ${summaryLog}"
|
||||
echo "tuneLog: ${tuneLog}"
|
||||
|
||||
echo "last_logt_dir: ${last_logt_dir}"
|
||||
summaryLogLast="${last_logt_dir}/summary.log"
|
||||
tuneLogLast="${last_logt_dir}/tuning_info.log"
|
||||
echo "summaryLogLast: ${summaryLogLast}"
|
||||
echo "tuneLogLast: ${tuneLogLast}"
|
||||
ghprbPullId=${SYSTEM_PULLREQUEST_PULLREQUESTNUMBER}
|
||||
MR_source_branch=${SYSTEM_PULLREQUEST_SOURCEBRANCH}
|
||||
MR_source_repo=${SYSTEM_PULLREQUEST_SOURCEREPOSITORYURI}
|
||||
MR_target_branch=${SYSTEM_PULLREQUEST_TARGETBRANCH}
|
||||
repo_url=${BUILD_REPOSITORY_URI}
|
||||
source_commit_id=${BUILD_SOURCEVERSION}
|
||||
build_id=${BUILD_BUILDID}
|
||||
echo "MR_source_branch: ${MR_source_branch}"
|
||||
echo "MR_source_repo: ${MR_source_repo}"
|
||||
echo "MR_target_branch: ${MR_target_branch}"
|
||||
echo "repo_url: ${repo_url}"
|
||||
echo "commit_id: ${source_commit_id}"
|
||||
echo "ghprbPullId: ${ghprbPullId}"
|
||||
echo "build_id: ${build_id}"
|
||||
|
||||
|
||||
function main {
|
||||
generate_html_head
|
||||
generate_html_body
|
||||
generate_results
|
||||
generate_html_footer
|
||||
}
|
||||
|
||||
function generate_inference {
|
||||
# echo "Generating inference"
|
||||
awk -v framework="${framework}" -v fw_version="${fw_version}" -v model="${model}" -v os="${os}" -v platform=${platform} -F ';' '
|
||||
BEGINE {
|
||||
fp32_perf_bs = "nan";
|
||||
fp32_perf_value = "nan";
|
||||
fp32_perf_url = "nan";
|
||||
fp32_acc_bs = "nan";
|
||||
fp32_acc_value = "nan";
|
||||
fp32_acc_url = "nan";
|
||||
|
||||
int8_perf_bs = "nan";
|
||||
int8_perf_value = "nan";
|
||||
int8_perf_url = "nan";
|
||||
int8_acc_bs = "nan";
|
||||
int8_acc_value = "nan";
|
||||
int8_acc_url = "nan";
|
||||
}{
|
||||
if($1 == os && $2 == platform && $3 == framework && $4 == fw_version && $6 == model) {
|
||||
// FP32
|
||||
if($5 == "FP32") {
|
||||
// Performance
|
||||
if($8 == "Performance") {
|
||||
fp32_perf_bs = $9;
|
||||
fp32_perf_value = $10;
|
||||
fp32_perf_url = $11;
|
||||
}
|
||||
// Accuracy
|
||||
if($8 == "Accuracy") {
|
||||
fp32_acc_bs = $9;
|
||||
fp32_acc_value = $10;
|
||||
fp32_acc_url = $11;
|
||||
}
|
||||
}
|
||||
|
||||
// INT8
|
||||
if($5 == "INT8") {
|
||||
// Performance
|
||||
if($8 == "Performance") {
|
||||
int8_perf_bs = $9;
|
||||
int8_perf_value = $10;
|
||||
int8_perf_url = $11;
|
||||
}
|
||||
// Accuracy
|
||||
if($8 == "Accuracy") {
|
||||
int8_acc_bs = $9;
|
||||
int8_acc_value = $10;
|
||||
int8_acc_url = $11;
|
||||
}
|
||||
}
|
||||
}
|
||||
}END {
|
||||
printf("%s;%s;%s;%s;", int8_perf_bs,int8_perf_value,int8_acc_bs,int8_acc_value);
|
||||
printf("%s;%s;%s;%s;", fp32_perf_bs,fp32_perf_value,fp32_acc_bs,fp32_acc_value);
|
||||
printf("%s;%s;%s;%s;", int8_perf_url,int8_acc_url,fp32_perf_url,fp32_acc_url);
|
||||
}
|
||||
' "$1"
|
||||
}
|
||||
|
||||
function generate_html_core {
|
||||
echo "--- current values ---"
|
||||
echo ${current_values}
|
||||
echo "--- last values ---"
|
||||
echo ${last_values}
|
||||
tuning_strategy=$(grep "^${os};${platform};${framework};${fw_version};${model};" ${tuneLog} |awk -F';' '{print $6}')
|
||||
tuning_time=$(grep "^${os};${platform};${framework};${fw_version};${model};" ${tuneLog} |awk -F';' '{print $7}')
|
||||
tuning_count=$(grep "^${os};${platform};${framework};${fw_version};${model};" ${tuneLog} |awk -F';' '{print $8}')
|
||||
tuning_log=$(grep "^${os};${platform};${framework};${fw_version};${model};" ${tuneLog} |awk -F';' '{print $9}')
|
||||
echo "<tr><td rowspan=3>${platform}</td><td rowspan=3>${os}</td><td rowspan=3>${framework}</td><td rowspan=3>${fw_version}</td><td rowspan=3>${model}</td><td>New</td><td><a href=${tuning_log}>${tuning_strategy}</a></td>" >> ${output_dir}/report.html
|
||||
echo "<td><a href=${tuning_log}>${tuning_time}</a></td><td><a href=${tuning_log}>${tuning_count}</a></td>" >> ${output_dir}/report.html
|
||||
|
||||
tuning_strategy=$(grep "^${os};${platform};${framework};${fw_version};${model};" ${tuneLogLast} |awk -F';' '{print $6}')
|
||||
tuning_time=$(grep "^${os};${platform};${framework};${fw_version};${model};" ${tuneLogLast} |awk -F';' '{print $7}')
|
||||
tuning_count=$(grep "^${os};${platform};${framework};${fw_version};${model};" ${tuneLogLast} |awk -F';' '{print $8}')
|
||||
tuning_log=$(grep "^${os};${platform};${framework};${fw_version};${model};" ${tuneLogLast} |awk -F';' '{print $9}')
|
||||
|
||||
echo |awk -F ';' -v current_values="${current_values}" -v last_values="${last_values}" \
|
||||
-v tuning_strategy="${tuning_strategy}" -v tuning_time="${tuning_time}" \
|
||||
-v tuning_count="${tuning_count}" -v tuning_log="${tuning_log}" -F ';' '
|
||||
|
||||
function abs(x) { return x < 0 ? -x : x }
|
||||
|
||||
function show_new_last(batch, link, value, metric) {
|
||||
if(value ~/[1-9]/) {
|
||||
if (metric == "perf" || metric == "ratio") {
|
||||
printf("<td>%s</td> <td><a href=%s>%.2f</a></td>\n",batch,link,value);
|
||||
} else {
|
||||
printf("<td>%s</td> <td><a href=%s>%.2f%</a></td>\n",batch,link,value*100);
|
||||
}
|
||||
} else {
|
||||
if(link == "" || value == "N/A" || value == "unknown") {
|
||||
printf("<td></td> <td></td>\n");
|
||||
} else {
|
||||
printf("<td>%s</td> <td><a href=%s>Failure</a></td>\n",batch,link);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function compare_current(int8_result, fp32_result, metric) {
|
||||
|
||||
if(int8_result ~/[1-9]/ && fp32_result ~/[1-9]/) {
|
||||
if(metric == "acc") {
|
||||
target = (int8_result - fp32_result) / fp32_result;
|
||||
if(target >= -0.01) {
|
||||
printf("<td rowspan=3 style=\"background-color:#90EE90\">%.2f %</td>", target*100);
|
||||
}else if(target < -0.05) {
|
||||
printf("<td rowspan=3 style=\"background-color:#FFD2D2\">%.2f %</td>", target*100);
|
||||
job_status = "fail"
|
||||
}else{
|
||||
printf("<td rowspan=3>%.2f %</td>", target*100);
|
||||
}
|
||||
}else if(metric == "perf") {
|
||||
target = int8_result / fp32_result;
|
||||
if(target >= 1.5) {
|
||||
printf("<td style=\"background-color:#90EE90\">%.2f</td>", target);
|
||||
}else if(target < 1) {
|
||||
printf("<td style=\"background-color:#FFD2D2\">%.2f</td>", target);
|
||||
perf_status = "fail"
|
||||
}else{
|
||||
printf("<td>%.2f</td>", target);
|
||||
}
|
||||
}
|
||||
else {
|
||||
target = int8_result / fp32_result;
|
||||
if(target >= 2) {
|
||||
printf("<td rowspan=3 style=\"background-color:#90EE90\">%.2f</td>", target);
|
||||
}else if(target < 1) {
|
||||
printf("<td rowspan=3 style=\"background-color:#FFD2D2\">%.2f</td>", target);
|
||||
job_status = "fail"
|
||||
}else{
|
||||
printf("<td rowspan=3>%.2f</td>", target);
|
||||
}
|
||||
}
|
||||
}else {
|
||||
printf("<td rowspan=3></td>");
|
||||
}
|
||||
}
|
||||
|
||||
function compare_result(new_result, previous_result, metric) {
|
||||
|
||||
if (new_result ~/[1-9]/ && previous_result ~/[1-9]/) {
|
||||
if(metric == "acc") {
|
||||
target = new_result - previous_result;
|
||||
if(target > -0.00001 && target < 0.00001) {
|
||||
status_png = "background-color:#90EE90";
|
||||
} else {
|
||||
status_png = "background-color:#FFD2D2";
|
||||
job_status = "fail"
|
||||
}
|
||||
printf("<td style=\"%s\" colspan=2>%.2f %</td>", status_png, target*100);
|
||||
} else {
|
||||
target = new_result / previous_result;
|
||||
if(target <= 1.084 && target >= 0.915) {
|
||||
status_png = "background-color:#90EE90";
|
||||
} else {
|
||||
status_png = "background-color:#FFD2D2";
|
||||
perf_status = "fail"
|
||||
}
|
||||
printf("<td style=\"%s\" colspan=2>%.2f</td>", status_png, target);
|
||||
}
|
||||
} else {
|
||||
if((new_result == nan && previous_result == nan) || new_result == "unknown"){
|
||||
printf("<td class=\"col-cell col-cell3\" colspan=2></td>");
|
||||
} else{
|
||||
job_status = "fail"
|
||||
status_png = "background-color:#FFD2D2";
|
||||
printf("<td style=\"%s\" colspan=2></td>", status_png);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function compare_ratio(int8_perf_value, fp32_perf_value, last_int8_perf_value, last_fp32_perf_value) {
|
||||
if (int8_perf_value ~/[1-9]/ && fp32_perf_value ~/[1-9]/ && last_int8_perf_value ~/[1-9]/ && last_fp32_perf_value ~/[1-9]/) {
|
||||
new_result = int8_perf_value / fp32_perf_value
|
||||
previous_result = last_int8_perf_value / last_fp32_perf_value
|
||||
target = new_result / previous_result;
|
||||
if (target <= 1.084 && target >= 0.915) {
|
||||
status_png = "background-color:#90EE90";
|
||||
} else {
|
||||
status_png = "background-color:#FFD2D2";
|
||||
ratio_status = "fail"
|
||||
}
|
||||
printf("<td style=\"%s\">%.2f</td>", status_png, target);
|
||||
} else {
|
||||
if (new_result == nan && previous_result == nan) {
|
||||
printf("<td class=\"col-cell col-cell3\"></td>");
|
||||
} else {
|
||||
if (new_result == nan) {
|
||||
ratio_status = "fail"
|
||||
status_png = "background-color:#FFD2D2";
|
||||
printf("<td style=\"%s\"></td>", status_png);
|
||||
} else {
|
||||
printf("<td class=\"col-cell col-cell3\"></td>");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BEGIN {
|
||||
job_status = "pass"
|
||||
perf_status = "pass"
|
||||
ratio_status = "pass"
|
||||
// issue list
|
||||
jira_mobilenet = "https://jira01.devtools.intel.com/browse/PADDLEQ-384";
|
||||
jira_resnext = "https://jira01.devtools.intel.com/browse/PADDLEQ-387";
|
||||
jira_ssdmobilenet = "https://jira01.devtools.intel.com/browse/PADDLEQ-406";
|
||||
}{
|
||||
// Current values
|
||||
split(current_values,current_value,";");
|
||||
|
||||
// Current
|
||||
|
||||
// INT8 Performance results
|
||||
int8_perf_batch=current_value[1]
|
||||
int8_perf_value=current_value[2]
|
||||
int8_perf_url=current_value[9]
|
||||
show_new_last(int8_perf_batch, int8_perf_url, int8_perf_value, "perf");
|
||||
|
||||
// INT8 Accuracy results
|
||||
int8_acc_batch=current_value[3]
|
||||
int8_acc_value=current_value[4]
|
||||
int8_acc_url=current_value[10]
|
||||
show_new_last(int8_acc_batch, int8_acc_url, int8_acc_value, "acc");
|
||||
|
||||
// FP32 Performance results
|
||||
fp32_perf_batch=current_value[5]
|
||||
fp32_perf_value=current_value[6]
|
||||
fp32_perf_url=current_value[11]
|
||||
show_new_last(fp32_perf_batch, fp32_perf_url, fp32_perf_value, "perf");
|
||||
|
||||
// FP32 Accuracy results
|
||||
fp32_acc_batch=current_value[7]
|
||||
fp32_acc_value=current_value[8]
|
||||
fp32_acc_url=current_value[12]
|
||||
show_new_last(fp32_acc_batch, fp32_acc_url, fp32_acc_value, "acc");
|
||||
|
||||
// Compare Current
|
||||
|
||||
compare_current(int8_perf_value, fp32_perf_value, "perf");
|
||||
compare_current(int8_acc_value, fp32_acc_value, "acc");
|
||||
|
||||
// Last values
|
||||
split(last_values,last_value,";");
|
||||
|
||||
// Last
|
||||
printf("</tr>\n<tr><td>Last</td><td><a href=%4$s>%1$s</a></td><td><a href=%4$s>%2$s</a></td><td><a href=%4$s>%3$s</a></td>", tuning_strategy, tuning_time, tuning_count, tuning_log);
|
||||
|
||||
// Show last INT8 Performance results
|
||||
last_int8_perf_batch=last_value[1]
|
||||
last_int8_perf_value=last_value[2]
|
||||
last_int8_perf_url=last_value[9]
|
||||
show_new_last(last_int8_perf_batch, last_int8_perf_url, last_int8_perf_value, "perf");
|
||||
|
||||
// Show last INT8 Accuracy results
|
||||
last_int8_acc_batch=last_value[3]
|
||||
last_int8_acc_value=last_value[4]
|
||||
last_int8_acc_url=last_value[10]
|
||||
show_new_last(last_int8_acc_batch, last_int8_acc_url, last_int8_acc_value, "acc");
|
||||
|
||||
// Show last FP32 Performance results
|
||||
last_fp32_perf_batch=last_value[5]
|
||||
last_fp32_perf_value=last_value[6]
|
||||
last_fp32_perf_url=last_value[11]
|
||||
show_new_last(last_fp32_perf_batch, last_fp32_perf_url, last_fp32_perf_value, "perf");
|
||||
|
||||
// Show last FP32 Accuracy results
|
||||
last_fp32_acc_batch=last_value[7]
|
||||
last_fp32_acc_value=last_value[8]
|
||||
last_fp32_acc_url=last_value[12]
|
||||
show_new_last(last_fp32_acc_batch, last_fp32_acc_url, last_fp32_acc_value, "acc");
|
||||
|
||||
compare_current(last_int8_perf_value, last_fp32_perf_value, "perf");
|
||||
|
||||
printf("</tr>")
|
||||
|
||||
// current vs last
|
||||
printf("</tr>\n<tr><td>New/Last</td><td colspan=3 class=\"col-cell3\"></td>");
|
||||
|
||||
// Compare INT8 Performance results
|
||||
compare_result(int8_perf_value, last_int8_perf_value,"perf");
|
||||
|
||||
// Compare INT8 Accuracy results
|
||||
compare_result(int8_acc_value, last_int8_acc_value, "acc");
|
||||
|
||||
// Compare FP32 Performance results
|
||||
compare_result(fp32_perf_value, last_fp32_perf_value, "perf");
|
||||
|
||||
// Compare FP32 Accuracy results
|
||||
compare_result(fp32_acc_value, last_fp32_acc_value, "acc");
|
||||
|
||||
// Compare INT8 FP32 Performance ratio
|
||||
compare_ratio(int8_perf_value, fp32_perf_value, last_int8_perf_value, last_fp32_perf_value);
|
||||
|
||||
printf("</tr>\n");
|
||||
|
||||
status = (perf_status == "fail" && ratio_status == "fail") ? "fail" : "pass"
|
||||
status = (job_status == "fail") ? "fail" : status
|
||||
|
||||
} END{
|
||||
printf("\n%s", status);
|
||||
}
|
||||
' >> ${output_dir}/report.html
|
||||
job_state=$(tail -1 ${WORKSPACE}/report.html)
|
||||
sed -i '$s/.*//' ${WORKSPACE}/report.html
|
||||
|
||||
if [ ${job_state} == 'fail' ]; then
|
||||
echo "====== perf_reg ======"
|
||||
echo "##vso[task.setvariable variable=is_perf_reg]true"
|
||||
fi
|
||||
}
|
||||
|
||||
function generate_results {
|
||||
echo "Generating tuning results"
|
||||
oses=$(sed '1d' ${summaryLog} |cut -d';' -f1 | awk '!a[$0]++')
|
||||
echo ${oses}
|
||||
|
||||
for os in ${oses[@]}
|
||||
do
|
||||
platforms=$(sed '1d' ${summaryLog} |grep "^${os}" |cut -d';' -f2 | awk '!a[$0]++')
|
||||
echo ${platforms}
|
||||
for platform in ${platforms[@]}
|
||||
do
|
||||
frameworks=$(sed '1d' ${summaryLog} |grep "^${os};${platform}" |cut -d';' -f3 | awk '!a[$0]++')
|
||||
echo ${frameworks}
|
||||
for framework in ${frameworks[@]}
|
||||
do
|
||||
fw_versions=$(sed '1d' ${summaryLog} |grep "^${os};${platform};${framework}" |cut -d';' -f4 | awk '!a[$0]++')
|
||||
echo ${fw_versions}
|
||||
for fw_version in ${fw_versions[@]}
|
||||
do
|
||||
models=$(sed '1d' ${summaryLog} |grep "^${os};${platform};${framework};${fw_version}" |cut -d';' -f6 | awk '!a[$0]++')
|
||||
echo ${models}
|
||||
for model in ${models[@]}
|
||||
do
|
||||
echo "--- processing model ---"
|
||||
echo ${model}
|
||||
current_values=$(generate_inference ${summaryLog})
|
||||
echo "| current value |"
|
||||
echo ${current_values}
|
||||
last_values=$(generate_inference ${summaryLogLast})
|
||||
echo "| last value |"
|
||||
echo ${last_values}
|
||||
|
||||
generate_html_core ${current_values} ${last_values}
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
function generate_html_body {
|
||||
MR_TITLE=''
|
||||
Test_Info_Title=''
|
||||
Test_Info=''
|
||||
|
||||
if [ "${qtools_branch}" == "" ];
|
||||
then
|
||||
commit_id=$(echo ${ghprbActualCommit} |awk '{print substr($1,1,7)}')
|
||||
|
||||
MR_TITLE="[ <a href='${repo_url}/pull/${ghprbPullId}'>PR-${ghprbPullId}</a> ]"
|
||||
Test_Info_Title="<th colspan="2">Source Branch</th> <th colspan="4">Target Branch</th> <th colspan="4">Commit</th> "
|
||||
Test_Info="<td colspan="2">${MR_source_branch}</td> <td colspan="4"><a href='${repo_url}/tree/${MR_target_branch}'>${MR_target_branch}</a></td> <td colspan="4"><a href='${MR_source_repo}/commit/${source_commit_id}'>${source_commit_id:0:6}</a></td>"
|
||||
else
|
||||
Test_Info_Title="<th colspan="4">Test Branch</th> <th colspan="4">Commit ID</th> "
|
||||
Test_Info="<th colspan="4">${qtools_branch}</th> <th colspan="4">${qtools_commit}</th> "
|
||||
fi
|
||||
|
||||
cat >> ${output_dir}/report.html << eof
|
||||
|
||||
<body>
|
||||
<div id="main">
|
||||
<h1 align="center">Neural Compressor Tuning Tests ${MR_TITLE}
|
||||
[ <a
|
||||
href="https://dev.azure.com/lpot-inc/neural-compressor/_build/results?buildId=${build_id}">Job-${build_id}</a>
|
||||
]</h1>
|
||||
<h1 align="center">Test Status: ${Jenkins_job_status}</h1>
|
||||
<h2>Summary</h2>
|
||||
<table class="features-table">
|
||||
<tr>
|
||||
<th>Repo</th>
|
||||
${Test_Info_Title}
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="https://github.com/intel/neural-compressor">neural-compressor</a></td>
|
||||
${Test_Info}
|
||||
</tr>
|
||||
</table>
|
||||
eof
|
||||
|
||||
|
||||
echo "Generating benchmarks table"
|
||||
cat >> ${output_dir}/report.html << eof
|
||||
<h2>Benchmark</h2>
|
||||
<table class="features-table">
|
||||
<tr>
|
||||
<th rowspan="2">Platform</th>
|
||||
<th rowspan="2">System</th>
|
||||
<th rowspan="2">Framework</th>
|
||||
<th rowspan="2">Version</th>
|
||||
<th rowspan="2">Model</th>
|
||||
<th rowspan="2">VS</th>
|
||||
<th rowspan="2">Tuning<br>Strategy</th>
|
||||
<th rowspan="2">Tuning<br>Time(s)</th>
|
||||
<th rowspan="2">Tuning<br>Count</th>
|
||||
<th colspan="4">INT8</th>
|
||||
<th colspan="4">FP32</th>
|
||||
<th colspan="2" class="col-cell col-cell1 col-cellh">Ratio</th>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
<th>bs</th>
|
||||
<th>imgs/s</th>
|
||||
<th>bs</th>
|
||||
<th>top1</th>
|
||||
|
||||
<th>bs</th>
|
||||
<th>imgs/s</th>
|
||||
<th>bs</th>
|
||||
<th>top1</th>
|
||||
|
||||
<th class="col-cell col-cell1">Throughput<br><font size="2px">INT8/FP32</font></th>
|
||||
<th class="col-cell col-cell1">Accuracy<br><font size="2px">(INT8-FP32)/FP32</font></th>
|
||||
</tr>
|
||||
eof
|
||||
}
|
||||
|
||||
function generate_html_footer {
|
||||
|
||||
cat >> ${output_dir}/report.html << eof
|
||||
<tr>
|
||||
<td colspan="17"><font color="#d6776f">Note: </font>All data tested on Azure Cloud.</td>
|
||||
<td colspan="2" class="col-cell col-cell1 col-cellf"></td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
eof
|
||||
}
|
||||
|
||||
function generate_html_head {
|
||||
|
||||
cat > ${output_dir}/report.html << eof
|
||||
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
|
||||
<title>Daily Tests - TensorFlow - Jenkins</title>
|
||||
<style type="text/css">
|
||||
body
|
||||
{
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
background: white no-repeat left top;
|
||||
}
|
||||
#main
|
||||
{
|
||||
// width: 100%;
|
||||
margin: 20px auto 10px auto;
|
||||
background: white;
|
||||
-moz-border-radius: 8px;
|
||||
-webkit-border-radius: 8px;
|
||||
padding: 0 30px 30px 30px;
|
||||
border: 1px solid #adaa9f;
|
||||
-moz-box-shadow: 0 2px 2px #9c9c9c;
|
||||
-webkit-box-shadow: 0 2px 2px #9c9c9c;
|
||||
}
|
||||
.features-table
|
||||
{
|
||||
width: 100%;
|
||||
margin: 0 auto;
|
||||
border-collapse: separate;
|
||||
border-spacing: 0;
|
||||
text-shadow: 0 1px 0 #fff;
|
||||
color: #2a2a2a;
|
||||
background: #fafafa;
|
||||
background-image: -moz-linear-gradient(top, #fff, #eaeaea, #fff); /* Firefox 3.6 */
|
||||
background-image: -webkit-gradient(linear,center bottom,center top,from(#fff),color-stop(0.5, #eaeaea),to(#fff));
|
||||
font-family: Verdana,Arial,Helvetica
|
||||
}
|
||||
.features-table th,td
|
||||
{
|
||||
text-align: center;
|
||||
height: 25px;
|
||||
line-height: 25px;
|
||||
padding: 0 8px;
|
||||
border: 1px solid #cdcdcd;
|
||||
box-shadow: 0 1px 0 white;
|
||||
-moz-box-shadow: 0 1px 0 white;
|
||||
-webkit-box-shadow: 0 1px 0 white;
|
||||
white-space: nowrap;
|
||||
}
|
||||
.no-border th
|
||||
{
|
||||
box-shadow: none;
|
||||
-moz-box-shadow: none;
|
||||
-webkit-box-shadow: none;
|
||||
}
|
||||
.col-cell
|
||||
{
|
||||
text-align: center;
|
||||
width: 150px;
|
||||
font: normal 1em Verdana, Arial, Helvetica;
|
||||
}
|
||||
.col-cell3
|
||||
{
|
||||
background: #efefef;
|
||||
background: rgba(144,144,144,0.15);
|
||||
}
|
||||
.col-cell1, .col-cell2
|
||||
{
|
||||
background: #B0C4DE;
|
||||
background: rgba(176,196,222,0.3);
|
||||
}
|
||||
.col-cellh
|
||||
{
|
||||
font: bold 1.3em 'trebuchet MS', 'Lucida Sans', Arial;
|
||||
-moz-border-radius-topright: 10px;
|
||||
-moz-border-radius-topleft: 10px;
|
||||
border-top-right-radius: 10px;
|
||||
border-top-left-radius: 10px;
|
||||
border-top: 1px solid #eaeaea !important;
|
||||
}
|
||||
.col-cellf
|
||||
{
|
||||
font: bold 1.4em Georgia;
|
||||
-moz-border-radius-bottomright: 10px;
|
||||
-moz-border-radius-bottomleft: 10px;
|
||||
border-bottom-right-radius: 10px;
|
||||
border-bottom-left-radius: 10px;
|
||||
border-bottom: 1px solid #dadada !important;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
eof
|
||||
|
||||
}
|
||||
|
||||
main
|
@@ -0,0 +1,123 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (c) 2021 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Benchmarking: measure the model performance with the objective settings."""
|
||||
|
||||
import argparse
|
||||
import subprocess
|
||||
|
||||
import numpy as np
|
||||
|
||||
parser = argparse.ArgumentParser(allow_abbrev=False)
|
||||
parser.add_argument("--cores_per_instance", type=int, required=True)
|
||||
parser.add_argument("--num_of_instance", type=int, required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def get_architecture():
|
||||
"""Get the architecture name of the system."""
|
||||
p1 = subprocess.Popen("lscpu", stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
p2 = subprocess.Popen(["grep", "Architecture"], stdin=p1.stdout, stdout=subprocess.PIPE)
|
||||
p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE)
|
||||
res = None
|
||||
for line in iter(p3.stdout.readline, b""):
|
||||
res = line.decode("utf-8").strip()
|
||||
return res
|
||||
|
||||
|
||||
def get_threads_per_core():
|
||||
"""Get the threads per core."""
|
||||
p1 = subprocess.Popen("lscpu", stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
p2 = subprocess.Popen(["grep", "Thread(s) per core"], stdin=p1.stdout, stdout=subprocess.PIPE)
|
||||
p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE)
|
||||
res = None
|
||||
for line in iter(p3.stdout.readline, b""):
|
||||
res = line.decode("utf-8").strip()
|
||||
return res
|
||||
|
||||
|
||||
def get_threads():
|
||||
"""Get the list of threads."""
|
||||
p1 = subprocess.Popen(["cat", "/proc/cpuinfo"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
p2 = subprocess.Popen(["grep", "processor"], stdin=p1.stdout, stdout=subprocess.PIPE)
|
||||
p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE)
|
||||
res = []
|
||||
for line in iter(p3.stdout.readline, b""):
|
||||
res.append(line.decode("utf-8").strip())
|
||||
return res
|
||||
|
||||
|
||||
def get_physical_ids():
|
||||
"""Get the list of sockets."""
|
||||
p1 = subprocess.Popen(["cat", "/proc/cpuinfo"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
p2 = subprocess.Popen(["grep", "physical id"], stdin=p1.stdout, stdout=subprocess.PIPE)
|
||||
p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE)
|
||||
res = []
|
||||
for line in iter(p3.stdout.readline, b""):
|
||||
res.append(line.decode("utf-8").strip())
|
||||
return res
|
||||
|
||||
|
||||
def get_core_ids():
|
||||
"""Get the ids list of the cores."""
|
||||
p1 = subprocess.Popen(["cat", "/proc/cpuinfo"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
p2 = subprocess.Popen(["grep", "core id"], stdin=p1.stdout, stdout=subprocess.PIPE)
|
||||
p3 = subprocess.Popen(["cut", "-d", ":", "-f2"], stdin=p2.stdout, stdout=subprocess.PIPE)
|
||||
res = []
|
||||
for line in iter(p3.stdout.readline, b""):
|
||||
res.append(line.decode("utf-8").strip())
|
||||
return res
|
||||
|
||||
|
||||
def get_bounded_threads(core_ids, threads, sockets):
|
||||
"""Return the threads id list that we will bind instances to."""
|
||||
res = []
|
||||
existing_socket_core_list = []
|
||||
for idx, x in enumerate(core_ids):
|
||||
socket_core = sockets[idx] + ":" + x
|
||||
if socket_core not in existing_socket_core_list:
|
||||
res.append(int(threads[idx]))
|
||||
existing_socket_core_list.append(socket_core)
|
||||
return res
|
||||
|
||||
|
||||
def config_instance(cores_per_instance, num_of_instance):
|
||||
"""Configure the multi-instance commands and trigger benchmark with sub process."""
|
||||
core = []
|
||||
|
||||
if get_architecture() == "aarch64" and int(get_threads_per_core()) > 1:
|
||||
raise OSError("Currently no support on AMD with hyperthreads")
|
||||
else:
|
||||
bounded_threads = get_bounded_threads(get_core_ids(), get_threads(), get_physical_ids())
|
||||
|
||||
for i in range(0, num_of_instance):
|
||||
if get_architecture() == "x86_64":
|
||||
core_list_idx = np.arange(0, cores_per_instance) + i * cores_per_instance
|
||||
core_list = np.array(bounded_threads)[core_list_idx]
|
||||
else:
|
||||
core_list = np.arange(0, cores_per_instance) + i * cores_per_instance
|
||||
core.append(core_list.tolist())
|
||||
|
||||
for i in range(len(core)):
|
||||
core[i] = [str(j) for j in core[i]]
|
||||
core[i] = ",".join(core[i])
|
||||
|
||||
core = ";".join(core)
|
||||
return core
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(config_instance(args.cores_per_instance, args.num_of_instance))
|
@@ -0,0 +1,140 @@
|
||||
#!/bin/bash
|
||||
set -eo pipefail
|
||||
source /neural-compressor/.azure-pipelines/scripts/change_color.sh
|
||||
|
||||
# get parameters
|
||||
PATTERN='[-a-zA-Z0-9_]*='
|
||||
SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models"
|
||||
|
||||
for i in "$@"; do
|
||||
case $i in
|
||||
--framework=*)
|
||||
framework=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--model=*)
|
||||
model=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--input_model=*)
|
||||
input_model=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--benchmark_cmd=*)
|
||||
benchmark_cmd=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--log_dir=*)
|
||||
log_dir=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--new_benchmark=*)
|
||||
new_benchmark=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--precision=*)
|
||||
precision=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--stage=*)
|
||||
stage=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--USE_TUNE_ACC=*)
|
||||
USE_TUNE_ACC=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--PERF_STABLE_CHECK=*)
|
||||
PERF_STABLE_CHECK=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--BUILD_BUILDID=*)
|
||||
BUILD_BUILDID=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
*)
|
||||
echo "Parameter $i not recognized."; exit 1;;
|
||||
esac
|
||||
done
|
||||
|
||||
$BOLD_YELLOW && echo "-------- run_benchmark_common --------" && $RESET
|
||||
|
||||
main() {
|
||||
# run accuracy
|
||||
echo "USE_TUNE_ACC=${USE_TUNE_ACC}, PERF_STABLE_CHECK=${PERF_STABLE_CHECK}"
|
||||
# USE_TUNE_ACC==true means using accuracy results from tuning log
|
||||
if [ ${USE_TUNE_ACC} == "false" ]; then
|
||||
run_accuracy
|
||||
fi
|
||||
|
||||
# run performance
|
||||
if [ ${PERF_STABLE_CHECK} == "false" ]; then
|
||||
run_performance
|
||||
else
|
||||
max_loop=3
|
||||
gap=(0.05 0.05 0.1)
|
||||
for ((iter = 0; iter < ${max_loop}; iter++)); do
|
||||
run_performance
|
||||
{
|
||||
check_perf_gap ${gap[${iter}]}
|
||||
exit_code=$?
|
||||
} || true
|
||||
|
||||
if [ ${exit_code} -ne 0 ]; then
|
||||
$BOLD_RED && echo "FAILED with performance gap!!" && $RESET
|
||||
else
|
||||
$BOLD_GREEN && echo "SUCCEED!!" && $RESET
|
||||
break
|
||||
fi
|
||||
done
|
||||
exit ${exit_code}
|
||||
fi
|
||||
}
|
||||
|
||||
function check_perf_gap() {
|
||||
python -u ${SCRIPTS_PATH}/collect_log_model.py \
|
||||
--framework=${framework} \
|
||||
--fwk_ver=${fwk_ver} \
|
||||
--model=${model} \
|
||||
--logs_dir="${log_dir}" \
|
||||
--output_dir="${log_dir}" \
|
||||
--build_id=${BUILD_BUILDID} \
|
||||
--stage=${stage} \
|
||||
--gap=$1
|
||||
}
|
||||
|
||||
function run_performance() {
|
||||
cmd="${benchmark_cmd} --input_model=${input_model}"
|
||||
if [ "${new_benchmark}" == "true" ]; then
|
||||
$BOLD_YELLOW && echo "run with internal benchmark..." && $RESET
|
||||
export NUM_OF_INSTANCE=2
|
||||
export CORES_PER_INSTANCE=4
|
||||
eval ${cmd} 2>&1 | tee ${log_dir}/${framework}-${model}-performance-${precision}.log
|
||||
else
|
||||
$BOLD_YELLOW && echo "run with external multiInstance benchmark..." && $RESET
|
||||
multiInstance
|
||||
fi
|
||||
}
|
||||
|
||||
function run_accuracy() {
|
||||
$BOLD_YELLOW && echo "run tuning accuracy in precision ${precision}" && $RESET
|
||||
eval "${benchmark_cmd} --input_model=${input_model} --mode=accuracy" 2>&1 | tee ${log_dir}/${framework}-${model}-accuracy-${precision}.log
|
||||
}
|
||||
|
||||
function multiInstance() {
|
||||
ncores_per_socket=${ncores_per_socket:=$(lscpu | grep 'Core(s) per socket' | cut -d: -f2 | xargs echo -n)}
|
||||
$BOLD_YELLOW && echo "Executing multi instance benchmark" && $RESET
|
||||
ncores_per_instance=4
|
||||
$BOLD_YELLOW && echo "ncores_per_socket=${ncores_per_socket}, ncores_per_instance=${ncores_per_instance}" && $RESET
|
||||
|
||||
logFile="${log_dir}/${framework}-${model}-performance-${precision}"
|
||||
benchmark_pids=()
|
||||
|
||||
core_list=$(python ${SCRIPTS_PATH}/new_benchmark.py --cores_per_instance=${ncores_per_instance} --num_of_instance=$(expr $ncores_per_socket / $ncores_per_instance))
|
||||
core_list=($(echo $core_list | tr ';' ' '))
|
||||
|
||||
for ((j = 0; $j < $(expr $ncores_per_socket / $ncores_per_instance); j = $(($j + 1)))); do
|
||||
$BOLD_GREEN && echo "OMP_NUM_THREADS=${ncores_per_instance} numactl --localalloc --physcpubind=${core_list[${j}]} ${cmd} 2>&1 | tee ${logFile}-${ncores_per_socket}-${ncores_per_instance}-${j}.log &" && $RESET
|
||||
OMP_NUM_THREADS=${ncores_per_instance} numactl --localalloc --physcpubind=${core_list[${j}]} ${cmd} 2>&1 | tee ${logFile}-${ncores_per_socket}-${ncores_per_instance}-${j}.log &
|
||||
benchmark_pids+=($!)
|
||||
done
|
||||
|
||||
status="SUCCESS"
|
||||
for pid in "${benchmark_pids[@]}"; do
|
||||
wait $pid
|
||||
exit_code=$?
|
||||
$BOLD_YELLOW && echo "Detected exit code: ${exit_code}" && $RESET
|
||||
if [ ${exit_code} == 0 ]; then
|
||||
$BOLD_GREEN && echo "Process ${pid} succeeded" && $RESET
|
||||
else
|
||||
$BOLD_RED && echo "Process ${pid} failed" && $RESET
|
||||
status="FAILURE"
|
||||
fi
|
||||
done
|
||||
|
||||
$BOLD_YELLOW && echo "Benchmark process status: ${status}" && $RESET
|
||||
if [ ${status} == "FAILURE" ]; then
|
||||
$BOLD_RED && echo "Benchmark process returned non-zero exit code." && $RESET
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
main
|
@@ -0,0 +1,177 @@
|
||||
#!/bin/bash
|
||||
set -eo pipefail
|
||||
source /neural-compressor/.azure-pipelines/scripts/change_color.sh
|
||||
# get parameters
|
||||
PATTERN='[-a-zA-Z0-9_]*='
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
case $i in
|
||||
--yaml=*)
|
||||
yaml=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--framework=*)
|
||||
framework=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--fwk_ver=*)
|
||||
fwk_ver=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--torch_vision_ver=*)
|
||||
torch_vision_ver=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--model=*)
|
||||
model=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--model_src_dir=*)
|
||||
model_src_dir=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--dataset_location=*)
|
||||
dataset_location=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--input_model=*)
|
||||
input_model=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--batch_size=*)
|
||||
batch_size=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--strategy=*)
|
||||
strategy=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--new_benchmark=*)
|
||||
new_benchmark=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--inc_new_api=*)
|
||||
inc_new_api=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--tuning_cmd=*)
|
||||
tuning_cmd=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--benchmark_cmd=*)
|
||||
benchmark_cmd=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--mode=*)
|
||||
mode=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--USE_TUNE_ACC=*)
|
||||
USE_TUNE_ACC=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--PERF_STABLE_CHECK=*)
|
||||
PERF_STABLE_CHECK=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--BUILD_BUILDID=*)
|
||||
BUILD_BUILDID=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
*)
|
||||
echo "Parameter $i not recognized."; exit 1;;
|
||||
esac
|
||||
done
|
||||
|
||||
function check_results() {
|
||||
local control_phrase=$1
|
||||
if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | wc -l) == 0 ];then
|
||||
$BOLD_RED && echo "====== Quantization FAILED!! ======" && $RESET; exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
log_dir="/neural-compressor/.azure-pipelines/scripts/models"
|
||||
SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models"
|
||||
if [[ "${inc_new_api}" == "3x"* ]]; then
|
||||
WORK_SOURCE_DIR="/neural-compressor/examples/3.x_api/${framework}"
|
||||
else
|
||||
WORK_SOURCE_DIR="/neural-compressor/examples/${framework}"
|
||||
fi
|
||||
$BOLD_YELLOW && echo "processing ${framework}-${fwk_ver}-${model}" && $RESET
|
||||
|
||||
if [ "${mode}" == "env_setup" ]; then
|
||||
/bin/bash env_setup.sh \
|
||||
--yaml=${yaml} \
|
||||
--framework=${framework} \
|
||||
--fwk_ver=${fwk_ver} \
|
||||
--torch_vision_ver=${torch_vision_ver} \
|
||||
--model=${model} \
|
||||
--model_src_dir=${model_src_dir} \
|
||||
--dataset_location=${dataset_location} \
|
||||
--batch_size=${batch_size} \
|
||||
--strategy=${strategy} \
|
||||
--new_benchmark=${new_benchmark} \
|
||||
--inc_new_api="${inc_new_api}"
|
||||
elif [ "${mode}" == "tuning" ]; then
|
||||
if [ "${framework}" == "onnxrt" ]; then
|
||||
output_model=${log_dir}/${model}/${framework}-${model}-tune.onnx
|
||||
elif [ "${framework}" == "tensorflow" ]; then
|
||||
output_model=${log_dir}/${model}/${framework}-${model}-tune.pb
|
||||
fi
|
||||
[[ ${output_model} ]] && tuning_cmd="${tuning_cmd} --output_model=${output_model}"
|
||||
|
||||
cd ${WORK_SOURCE_DIR}/${model_src_dir}
|
||||
# for int4 models add "--accuracy" to run tuning after quantize
|
||||
if [[ "${model}" == *"int4"* ]]; then
|
||||
sed -i "s|--quantize|--quantize --accuracy --load|g" run_quant.sh
|
||||
fi
|
||||
|
||||
$BOLD_YELLOW && echo "workspace ${WORK_SOURCE_DIR}/${model_src_dir}" && $RESET
|
||||
$BOLD_YELLOW && echo "tuning_cmd is === ${tuning_cmd}" && $RESET
|
||||
$BOLD_YELLOW && echo "======== run tuning ========" && $RESET
|
||||
/bin/bash ${SCRIPTS_PATH}/run_tuning_common.sh \
|
||||
--tuning_cmd="${tuning_cmd}" \
|
||||
--strategy=${strategy} \
|
||||
2>&1 | tee -a ${log_dir}/${model}/${framework}-${model}-tune.log
|
||||
$BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET
|
||||
if [[ "${inc_new_api}" == "3x"* ]]; then
|
||||
control_phrase_1="Preparation end."
|
||||
check_results $control_phrase_1
|
||||
control_phrase_2="Conversion end."
|
||||
check_results $control_phrase_2
|
||||
else
|
||||
control_phrase="model which meet accuracy goal."
|
||||
check_results $control_phrase
|
||||
if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | grep "Not found" | wc -l) == 1 ];then
|
||||
$BOLD_RED && echo "====== Quantization FAILED!! ======" && $RESET; exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
$BOLD_GREEN && echo "====== Quantization SUCCEED!! ======" && $RESET
|
||||
elif [ "${mode}" == "fp32_benchmark" ]; then
|
||||
cd ${WORK_SOURCE_DIR}/${model_src_dir}
|
||||
$BOLD_YELLOW && echo "workspace ${WORK_SOURCE_DIR}/${model_src_dir}" && $RESET
|
||||
$BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET
|
||||
$BOLD_YELLOW && echo "====== run benchmark fp32 =======" && $RESET
|
||||
/bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \
|
||||
--framework=${framework} \
|
||||
--model=${model} \
|
||||
--input_model=${input_model} \
|
||||
--benchmark_cmd="${benchmark_cmd}" \
|
||||
--log_dir="${log_dir}/${model}" \
|
||||
--new_benchmark=${new_benchmark} \
|
||||
--precision="fp32" \
|
||||
--stage=${mode} \
|
||||
--USE_TUNE_ACC=${USE_TUNE_ACC} \
|
||||
--PERF_STABLE_CHECK=${PERF_STABLE_CHECK} \
|
||||
--BUILD_BUILDID=${BUILD_BUILDID}
|
||||
elif [ "${mode}" == "int8_benchmark" ]; then
|
||||
cd ${WORK_SOURCE_DIR}/${model_src_dir}
|
||||
$BOLD_YELLOW && echo "workspace ${WORK_SOURCE_DIR}/${model_src_dir}" && $RESET
|
||||
$BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET
|
||||
$BOLD_YELLOW && echo "====== run benchmark int8 =======" && $RESET
|
||||
if [[ "${framework}" == "onnxrt" ]]; then
|
||||
model_name="${log_dir}/${model}/${framework}-${model}-tune.onnx"
|
||||
elif [[ "${framework}" == "tensorflow" ]]; then
|
||||
model_name="${log_dir}/${model}/${framework}-${model}-tune.pb"
|
||||
elif [[ "${framework}" == "pytorch" ]]; then
|
||||
model_name=${input_model}
|
||||
benchmark_cmd="${benchmark_cmd} --int8=true"
|
||||
fi
|
||||
/bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \
|
||||
--framework=${framework} \
|
||||
--model=${model} \
|
||||
--input_model="${model_name}" \
|
||||
--benchmark_cmd="${benchmark_cmd}" \
|
||||
--log_dir="${log_dir}/${model}" \
|
||||
--new_benchmark=${new_benchmark} \
|
||||
--precision="int8" \
|
||||
--stage=${mode} \
|
||||
--USE_TUNE_ACC=${USE_TUNE_ACC} \
|
||||
--PERF_STABLE_CHECK=${PERF_STABLE_CHECK} \
|
||||
--BUILD_BUILDID=${BUILD_BUILDID}
|
||||
elif [ "${mode}" == "collect_log" ]; then
|
||||
cd ${WORK_SOURCE_DIR}/${model_src_dir}
|
||||
$BOLD_YELLOW && echo "workspace ${WORK_SOURCE_DIR}/${model_src_dir}" && $RESET
|
||||
$BOLD_YELLOW && echo "====== collect logs of model ${model} =======" && $RESET
|
||||
if [ "${framework}" == "pytorch" ] && [ "${fwk_ver}" == "latest" ]; then
|
||||
fwk_ver=$(python -c "import torch; print(torch.__version__)")
|
||||
fi
|
||||
|
||||
python -u ${SCRIPTS_PATH}/collect_log_model.py \
|
||||
--framework=${framework} \
|
||||
--fwk_ver=${fwk_ver} \
|
||||
--model=${model} \
|
||||
--logs_dir="${log_dir}/${model}" \
|
||||
--output_dir="${log_dir}/${model}" \
|
||||
--build_id=${BUILD_BUILDID} \
|
||||
--stage=${mode} \
|
||||
--inc_new_api="${inc_new_api}"
|
||||
$BOLD_YELLOW && echo "====== Finish collect logs =======" && $RESET
|
||||
fi
|
@@ -0,0 +1,62 @@
|
||||
#!/bin/bash
|
||||
set -eo pipefail
|
||||
# get parameters
|
||||
PATTERN='[-a-zA-Z0-9_]*='
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
case $i in
|
||||
--model=*)
|
||||
model=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--mode=*)
|
||||
mode=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--USE_TUNE_ACC=*)
|
||||
USE_TUNE_ACC=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--PERF_STABLE_CHECK=*)
|
||||
PERF_STABLE_CHECK=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--BUILD_BUILDID=*)
|
||||
BUILD_BUILDID=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
*)
|
||||
echo "Parameter $i not recognized."; exit 1;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "specify FWs version..."
|
||||
source /neural-compressor/.azure-pipelines/scripts/fwk_version.sh 'latest'
|
||||
FRAMEWORK="onnxrt"
|
||||
FRAMEWORK_VERSION=${onnxruntime_version}
|
||||
|
||||
inc_new_api=false
|
||||
# ======== set up config for onnxrt models ========
|
||||
if [ "${model}" == "resnet50-v1-12" ]; then
|
||||
model_src_dir="image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static"
|
||||
dataset_location="/tf_dataset2/datasets/imagenet/ImagenetRaw/ImagenetRaw_small_5000/ILSVRC2012_img_val"
|
||||
input_model="/tf_dataset2/models/onnx/resnet50-v1-12/resnet50-v1-12.onnx"
|
||||
yaml="resnet50_v1_5.yaml"
|
||||
strategy="basic"
|
||||
batch_size=1
|
||||
new_benchmark=true
|
||||
inc_new_api=true
|
||||
tuning_cmd="bash run_quant.sh --input_model=${input_model} --dataset_location=${dataset_location}"
|
||||
benchmark_cmd="bash run_benchmark.sh --config=${yaml} --mode=performance --dataset_location=${dataset_location}"
|
||||
fi
|
||||
|
||||
|
||||
/bin/bash run_model_trigger_common.sh \
|
||||
--yaml=${yaml} \
|
||||
--framework=${FRAMEWORK} \
|
||||
--fwk_ver=${FRAMEWORK_VERSION} \
|
||||
--model=${model} \
|
||||
--model_src_dir=${model_src_dir} \
|
||||
--dataset_location=${dataset_location} \
|
||||
--input_model=${input_model} \
|
||||
--batch_size=${batch_size} \
|
||||
--strategy=${strategy} \
|
||||
--new_benchmark=${new_benchmark} \
|
||||
--tuning_cmd="${tuning_cmd}" \
|
||||
--benchmark_cmd="${benchmark_cmd}" \
|
||||
--inc_new_api="${inc_new_api}" \
|
||||
--mode=${mode} \
|
||||
--USE_TUNE_ACC=${USE_TUNE_ACC} \
|
||||
--PERF_STABLE_CHECK=${PERF_STABLE_CHECK} \
|
||||
--BUILD_BUILDID=${BUILD_BUILDID}
|
@@ -0,0 +1,100 @@
|
||||
#!/bin/bash
|
||||
set -eo pipefail
|
||||
# get parameters
|
||||
PATTERN='[-a-zA-Z0-9_]*='
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
case $i in
|
||||
--model=*)
|
||||
model=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--mode=*)
|
||||
mode=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--USE_TUNE_ACC=*)
|
||||
USE_TUNE_ACC=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--PERF_STABLE_CHECK=*)
|
||||
PERF_STABLE_CHECK=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--BUILD_BUILDID=*)
|
||||
BUILD_BUILDID=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
*)
|
||||
echo "Parameter $i not recognized."; exit 1;;
|
||||
esac
|
||||
done
|
||||
|
||||
dataset_location=""
|
||||
input_model=""
|
||||
yaml=""
|
||||
strategy=""
|
||||
batch_size=""
|
||||
new_benchmark=true
|
||||
inc_new_api=true
|
||||
benchmark_cmd=""
|
||||
# ======== set up config for pytorch models ========
|
||||
if [ "${model}" == "resnet18" ]; then
|
||||
model_src_dir="image_recognition/torchvision_models/quantization/ptq/cpu/eager"
|
||||
dataset_location="/tf_dataset2/datasets/mini-imageraw"
|
||||
input_model=""
|
||||
yaml="conf.yaml"
|
||||
strategy="bayesian"
|
||||
batch_size=1
|
||||
new_benchmark=false
|
||||
inc_new_api=false
|
||||
tuning_cmd="bash run_tuning.sh --topology=resnet18 --dataset_location=${dataset_location} --input_model=${input_model}"
|
||||
benchmark_cmd="bash run_benchmark.sh --topology=resnet18 --dataset_location=${dataset_location} --mode=benchmark --batch_size=${batch_size} --iters=500"
|
||||
elif [ "${model}" == "resnet18_fx" ]; then
|
||||
model_src_dir="image_recognition/torchvision_models/quantization/ptq/cpu/fx/"
|
||||
dataset_location="/tf_dataset2/datasets/mini-imageraw"
|
||||
input_model="resnet18"
|
||||
yaml=""
|
||||
strategy="basic"
|
||||
batch_size=1
|
||||
new_benchmark=true
|
||||
inc_new_api=true
|
||||
tuning_cmd="bash run_quant.sh --topology=resnet18 --dataset_location=${dataset_location} --input_model=${input_model}"
|
||||
benchmark_cmd="bash run_benchmark.sh --topology=resnet18 --dataset_location=${dataset_location} --mode=performance --batch_size=${batch_size} --iters=500"
|
||||
elif [ "${model}" == "opt_125m_woq_gptq_int4" ]; then
|
||||
model_src_dir="nlp/huggingface_models/language-modeling/quantization/weight_only"
|
||||
inc_new_api=3x_pt
|
||||
tuning_cmd="bash run_quant.sh --topology=opt_125m_woq_gptq_int4"
|
||||
elif [ "${model}" == "opt_125m_woq_gptq_nf4_dq_bnb" ]; then
|
||||
model_src_dir="nlp/huggingface_models/language-modeling/quantization/weight_only"
|
||||
inc_new_api=3x_pt
|
||||
tuning_cmd="bash run_quant.sh --topology=opt_125m_woq_gptq_nf4_dq_bnb"
|
||||
elif [ "${model}" == "opt_125m_woq_gptq_int4_dq_ggml" ]; then
|
||||
model_src_dir="nlp/huggingface_models/language-modeling/quantization/weight_only"
|
||||
inc_new_api=3x_pt
|
||||
tuning_cmd="bash run_quant.sh --topology=opt_125m_woq_gptq_int4_dq_ggml"
|
||||
fi
|
||||
|
||||
echo "Specify FWs version..."
|
||||
|
||||
FRAMEWORK="pytorch"
|
||||
source /neural-compressor/.azure-pipelines/scripts/fwk_version.sh 'latest'
|
||||
if [[ "${inc_new_api}" == "3x"* ]]; then
|
||||
FRAMEWORK_VERSION="latest"
|
||||
export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
|
||||
else
|
||||
FRAMEWORK_VERSION=${pytorch_version}
|
||||
TORCH_VISION_VERSION=${torchvision_version}
|
||||
fi
|
||||
|
||||
|
||||
/bin/bash run_model_trigger_common.sh \
|
||||
--yaml=${yaml} \
|
||||
--framework=${FRAMEWORK} \
|
||||
--fwk_ver=${FRAMEWORK_VERSION} \
|
||||
--torch_vision_ver=${TORCH_VISION_VERSION} \
|
||||
--model=${model} \
|
||||
--model_src_dir=${model_src_dir} \
|
||||
--dataset_location=${dataset_location} \
|
||||
--input_model=${input_model} \
|
||||
--batch_size=${batch_size} \
|
||||
--strategy=${strategy} \
|
||||
--new_benchmark=${new_benchmark} \
|
||||
--tuning_cmd="${tuning_cmd}" \
|
||||
--benchmark_cmd="${benchmark_cmd}" \
|
||||
--inc_new_api="${inc_new_api}" \
|
||||
--mode=${mode} \
|
||||
--USE_TUNE_ACC=${USE_TUNE_ACC} \
|
||||
--PERF_STABLE_CHECK=${PERF_STABLE_CHECK} \
|
||||
--BUILD_BUILDID=${BUILD_BUILDID}
|
@@ -0,0 +1,118 @@
|
||||
#!/bin/bash
|
||||
set -eo pipefail
|
||||
# get parameters
|
||||
PATTERN='[-a-zA-Z0-9_]*='
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
case $i in
|
||||
--model=*)
|
||||
model=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--mode=*)
|
||||
mode=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--USE_TUNE_ACC=*)
|
||||
USE_TUNE_ACC=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--PERF_STABLE_CHECK=*)
|
||||
PERF_STABLE_CHECK=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--BUILD_BUILDID=*)
|
||||
BUILD_BUILDID=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
*)
|
||||
echo "Parameter $i not recognized."; exit 1;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "specify FWs version..."
|
||||
source /neural-compressor/.azure-pipelines/scripts/fwk_version.sh 'latest'
|
||||
FRAMEWORK="tensorflow"
|
||||
FRAMEWORK_VERSION=${tensorflow_version}
|
||||
|
||||
inc_new_api=false
|
||||
# ======== set up config for tensorflow models ========
|
||||
if [ "${model}" == "resnet50v1.5" ]; then
|
||||
model_src_dir="image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq"
|
||||
dataset_location="/tf_dataset/dataset/TF_mini_imagenet"
|
||||
input_model="/tf_dataset/pre-trained-models/resnet50v1_5/fp32/resnet50_v1.pb"
|
||||
new_benchmark=true
|
||||
inc_new_api=true
|
||||
tuning_cmd="bash run_quant.sh --dataset_location=${dataset_location} --input_model=${input_model}"
|
||||
benchmark_cmd="bash run_benchmark.sh --dataset_location=${dataset_location} --batch_size=1 --mode=performance"
|
||||
elif [ "${model}" == "ssd_resnet50_v1" ];then
|
||||
model_src_dir="object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq"
|
||||
dataset_location="/tf_dataset/tensorflow/mini-coco-100.record"
|
||||
input_model="/tf_dataset/pre-train-model-oob/object_detection/ssd_resnet50_v1/frozen_inference_graph.pb"
|
||||
new_benchmark=true
|
||||
inc_new_api=true
|
||||
tuning_cmd="bash run_quant.sh --dataset_location=${dataset_location} --input_model=${input_model}"
|
||||
benchmark_cmd="bash run_benchmark.sh --dataset_location=${dataset_location} --batch_size=1 --mode=performance"
|
||||
elif [ "${model}" == "ssd_mobilenet_v1_ckpt" ];then
|
||||
model_src_dir="object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq"
|
||||
dataset_location="/tf_dataset/tensorflow/mini-coco-100.record"
|
||||
input_model="/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1"
|
||||
new_benchmark=true
|
||||
inc_new_api=true
|
||||
tuning_cmd="bash run_quant.sh --dataset_location=${dataset_location} --input_model=${input_model}"
|
||||
benchmark_cmd="bash run_benchmark.sh --dataset_location=${dataset_location} --batch_size=1 --mode=performance"
|
||||
elif [ "${model}" == "inception_v1" ]; then
|
||||
model_src_dir="image_recognition/tensorflow_models/quantization/ptq"
|
||||
dataset_location="/tf_dataset/dataset/TF_mini_imagenet"
|
||||
input_model="/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_inception_v1.pb"
|
||||
yaml="inception_v1.yaml"
|
||||
strategy="basic"
|
||||
batch_size=1
|
||||
new_benchmark=true
|
||||
tuning_cmd="bash run_tuning.sh --config=${yaml} --input_model=${input_model}"
|
||||
benchmark_cmd="bash run_benchmark.sh --config=${yaml} --mode=performance"
|
||||
elif [ "${model}" == "darknet19" ]; then
|
||||
model_src_dir="oob_models/quantization/ptq"
|
||||
dataset_location=""
|
||||
input_model="/tf_dataset/tensorflow/tf_oob_models/ov/all_tf_models/PublicInHouse/classification/darknet19/darknet19.pb"
|
||||
yaml="config.yaml"
|
||||
strategy="basic"
|
||||
batch_size=1
|
||||
new_benchmark=false
|
||||
inc_new_api=true
|
||||
tuning_cmd="bash run_quant.sh --topology=${model} --input_model=${input_model}"
|
||||
benchmark_cmd="bash run_benchmark.sh --topology=${model} --mode=performance --batch_size=1 --iters=500"
|
||||
elif [ "${model}" == "densenet-121" ]; then
|
||||
model_src_dir="oob_models/quantization/ptq"
|
||||
dataset_location=""
|
||||
input_model="/tf_dataset/tensorflow/tf_oob_models/ov/all_tf_models/classification/densenet/121/tf/densenet-121.pb"
|
||||
yaml="config.yaml"
|
||||
strategy="basic"
|
||||
batch_size=1
|
||||
new_benchmark=false
|
||||
inc_new_api=true
|
||||
tuning_cmd="bash run_quant.sh --topology=${model} --input_model=${input_model}"
|
||||
benchmark_cmd="bash run_benchmark.sh --topology=${model} --mode=performance --batch_size=1 --iters=500"
|
||||
elif [ "${model}" == "resnet-101" ]; then
|
||||
model_src_dir="oob_models/quantization/ptq"
|
||||
dataset_location=""
|
||||
input_model="/tf_dataset/tensorflow/tf_oob_models/ov/all_tf_models/classification/resnet/v1/101/tf/resnet-101.pb"
|
||||
yaml="config.yaml"
|
||||
strategy="basic"
|
||||
batch_size=1
|
||||
new_benchmark=false
|
||||
inc_new_api=true
|
||||
tuning_cmd="bash run_quant.sh --topology=${model} --input_model=${input_model}"
|
||||
benchmark_cmd="bash run_benchmark.sh --topology=${model} --mode=performance --batch_size=1 --iters=500"
|
||||
fi
|
||||
|
||||
|
||||
/bin/bash run_model_trigger_common.sh \
|
||||
--yaml=${yaml} \
|
||||
--framework=${FRAMEWORK} \
|
||||
--fwk_ver=${FRAMEWORK_VERSION} \
|
||||
--model=${model} \
|
||||
--model_src_dir=${model_src_dir} \
|
||||
--dataset_location=${dataset_location} \
|
||||
--input_model=${input_model} \
|
||||
--batch_size=${batch_size} \
|
||||
--strategy=${strategy} \
|
||||
--new_benchmark=${new_benchmark} \
|
||||
--tuning_cmd="${tuning_cmd}" \
|
||||
--benchmark_cmd="${benchmark_cmd}" \
|
||||
--inc_new_api="${inc_new_api}" \
|
||||
--mode=${mode} \
|
||||
--USE_TUNE_ACC=${USE_TUNE_ACC} \
|
||||
--PERF_STABLE_CHECK=${PERF_STABLE_CHECK} \
|
||||
--BUILD_BUILDID=${BUILD_BUILDID}
|
@@ -0,0 +1,30 @@
|
||||
#!/bin/bash
|
||||
set -eo pipefail
|
||||
source /neural-compressor/.azure-pipelines/scripts/change_color.sh
|
||||
|
||||
# get parameters
|
||||
PATTERN='[-a-zA-Z0-9_]*='
|
||||
|
||||
starttime=`date +'%Y-%m-%d %H:%M:%S'`
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
case $i in
|
||||
--tuning_cmd=*)
|
||||
tuning_cmd=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
--strategy=*)
|
||||
strategy=`echo $i | sed "s/${PATTERN}//"`;;
|
||||
*)
|
||||
echo "Parameter $i not recognized."; exit 1;;
|
||||
esac
|
||||
done
|
||||
|
||||
eval "/usr/bin/time -v ${tuning_cmd}"
|
||||
|
||||
$BOLD_YELLOW && echo "====== finish tuning. echo information. ======" && $RESET
|
||||
endtime=`date +'%Y-%m-%d %H:%M:%S'`
|
||||
start_seconds=$(date --date="$starttime" +%s);
|
||||
end_seconds=$(date --date="$endtime" +%s);
|
||||
$BOLD_GREEN && echo "Tuning time spend: "$((end_seconds-start_seconds))"s " && $RESET
|
||||
$BOLD_GREEN && echo "Tuning strategy: ${strategy}" && $RESET
|
||||
$BOLD_GREEN && echo "Total resident size (kbytes): $(cat /proc/meminfo | grep 'MemTotal' | sed 's/[^0-9]//g')" && $RESET
|
@@ -0,0 +1,322 @@
|
||||
import argparse
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
from typing import Optional, Union
|
||||
|
||||
import psutil
|
||||
|
||||
system = platform.system()
|
||||
try:
|
||||
import ruamel.yaml as yaml
|
||||
except:
|
||||
import ruamel_yaml as yaml
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--yaml", type=str, required=True, help="Path to yaml config.")
|
||||
parser.add_argument("--framework", type=str, required=True, help="Framework of model.")
|
||||
parser.add_argument("--dataset_location", type=str, required=True, help="Location of dataset used for model.")
|
||||
parser.add_argument("--strategy", type=str, required=False, help="Strategy to update.")
|
||||
parser.add_argument("--batch_size", type=int, required=False, help="Batch size.")
|
||||
parser.add_argument("--new_benchmark", type=str, required=False, help="Whether to modify benchmark config.")
|
||||
parser.add_argument("--multi_instance", type=str, required=False, help="Whether to eval in multi-instance.")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def update_yaml_dataset(yaml, framework, dataset_location):
|
||||
if not os.path.isfile(yaml):
|
||||
raise Exception(f"Not found yaml config at '{yaml}' location.")
|
||||
|
||||
print("Reading config")
|
||||
with open(yaml, "r") as config:
|
||||
lines = config.readlines()
|
||||
|
||||
# Update dataset
|
||||
if framework != "pytorch":
|
||||
val_txt_location = os.path.dirname(dataset_location) + f"{os.path.sep}" + "val.txt"
|
||||
|
||||
patterns = {
|
||||
"root_path": {
|
||||
"pattern": r"root:.*/path/to/(calibration|evaluation)/dataset/?",
|
||||
"replacement": f"root: {dataset_location}",
|
||||
},
|
||||
"data_path": {
|
||||
"pattern": r"data_path:.*/path/to/(calibration|evaluation)/dataset/?",
|
||||
"replacement": f"data_path: {dataset_location}",
|
||||
},
|
||||
"image_list": {
|
||||
"pattern": r"image_list:.*/path/to/(calibration|evaluation)/label/?",
|
||||
"replacement": f"image_list: {val_txt_location}",
|
||||
},
|
||||
"data_dir": {
|
||||
"pattern": r"data_dir:.*/path/to/dataset/?",
|
||||
"replacement": f"data_dir: {dataset_location}",
|
||||
},
|
||||
}
|
||||
print("======= update_yaml_dataset =======")
|
||||
with open(yaml, "w") as config:
|
||||
for line in lines:
|
||||
for key, key_patterns in patterns.items():
|
||||
if re.search(key_patterns["pattern"], line):
|
||||
print(f"Replacing {key} key.")
|
||||
line = re.sub(key_patterns["pattern"], key_patterns["replacement"], line)
|
||||
config.write(line)
|
||||
|
||||
else:
|
||||
val_dataset = dataset_location + f"{os.path.sep}" + "val"
|
||||
train_dataset = dataset_location + f"{os.path.sep}" + "train"
|
||||
patterns = {
|
||||
"calibration_dataset": {
|
||||
"pattern": r"root:.*/path/to/calibration/dataset/?",
|
||||
"replacement": f"root: {train_dataset}",
|
||||
},
|
||||
"evaluation_dataset": {
|
||||
"pattern": r"root:.*/path/to/evaluation/dataset/?",
|
||||
"replacement": f"root: {val_dataset}",
|
||||
},
|
||||
}
|
||||
|
||||
print("======= update_yaml_dataset =======")
|
||||
with open(yaml, "w") as config:
|
||||
for line in lines:
|
||||
for key, key_patterns in patterns.items():
|
||||
if re.search(key_patterns["pattern"], line):
|
||||
print(f"Replacing {key} key.")
|
||||
line = re.sub(key_patterns["pattern"], key_patterns["replacement"], line)
|
||||
config.write(line)
|
||||
|
||||
|
||||
def update_yaml_config_tuning(
|
||||
yaml_file,
|
||||
strategy=None,
|
||||
mode=None,
|
||||
batch_size=None,
|
||||
iteration=None,
|
||||
max_trials=None,
|
||||
algorithm=None,
|
||||
timeout=None,
|
||||
strategy_token=None,
|
||||
sampling_size=None,
|
||||
dtype=None,
|
||||
tf_new_api=None,
|
||||
):
|
||||
with open(yaml_file) as f:
|
||||
yaml_config = yaml.round_trip_load(f, preserve_quotes=True)
|
||||
|
||||
if algorithm:
|
||||
try:
|
||||
model_wise = yaml_config.get("quantization", {}).get("model_wise", {})
|
||||
prev_activation = model_wise.get("activation", {})
|
||||
if not prev_activation:
|
||||
model_wise.update({"activation": {}})
|
||||
prev_activation = model_wise.get("activation", {})
|
||||
prev_activation.update({"algorithm": algorithm})
|
||||
except Exception as e:
|
||||
print(f"[ WARNING ] {e}")
|
||||
|
||||
if timeout:
|
||||
try:
|
||||
exit_policy = yaml_config.get("tuning", {}).get("exit_policy", {})
|
||||
prev_timeout = exit_policy.get("timeout", None)
|
||||
exit_policy.update({"timeout": timeout})
|
||||
print(f"Changed {prev_timeout} to {timeout}")
|
||||
except Exception as e:
|
||||
print(f"[ WARNING ] {e}")
|
||||
|
||||
if strategy and strategy != "basic": # Workaround for PyTorch huggingface models (`sed` in run_quant.sh)
|
||||
try:
|
||||
tuning_config = yaml_config.get("tuning", {})
|
||||
prev_strategy = tuning_config.get("strategy", {})
|
||||
if not prev_strategy:
|
||||
tuning_config.update({"strategy": {}})
|
||||
prev_strategy = tuning_config.get("strategy", {})
|
||||
strategy_name = prev_strategy.get("name", None)
|
||||
prev_strategy.update({"name": strategy})
|
||||
if strategy == "sigopt":
|
||||
prev_strategy.update(
|
||||
{
|
||||
"sigopt_api_token": strategy_token,
|
||||
"sigopt_project_id": "lpot",
|
||||
"sigopt_experiment_name": "lpot-tune",
|
||||
}
|
||||
)
|
||||
if strategy == "hawq":
|
||||
prev_strategy.update({"loss": "CrossEntropyLoss"})
|
||||
print(f"Changed {strategy_name} to {strategy}")
|
||||
except Exception as e:
|
||||
print(f"[ WARNING ] {e}")
|
||||
|
||||
if max_trials and max_trials > 0:
|
||||
try:
|
||||
tuning_config = yaml_config.get("tuning", {})
|
||||
prev_exit_policy = tuning_config.get("exit_policy", {})
|
||||
if not prev_exit_policy:
|
||||
tuning_config.update({"exit_policy": {"max_trials": max_trials}})
|
||||
else:
|
||||
prev_max_trials = prev_exit_policy.get("max_trials", None)
|
||||
prev_exit_policy.update({"max_trials": max_trials})
|
||||
print(f"Changed {prev_max_trials} to {max_trials}")
|
||||
except Exception as e:
|
||||
print(f"[ WARNING ] {e}")
|
||||
|
||||
if mode == "accuracy":
|
||||
try:
|
||||
# delete performance part in yaml if exist
|
||||
performance = yaml_config.get("evaluation", {}).get("performance", {})
|
||||
if performance:
|
||||
yaml_config.get("evaluation", {}).pop("performance", {})
|
||||
# accuracy batch_size replace
|
||||
if batch_size:
|
||||
try:
|
||||
dataloader = yaml_config.get("evaluation", {}).get("accuracy", {}).get("dataloader", {})
|
||||
prev_batch_size = dataloader.get("batch_size", None)
|
||||
dataloader.update({"batch_size": batch_size})
|
||||
print(f"Changed accuracy batch size from {prev_batch_size} to {batch_size}")
|
||||
except Exception as e:
|
||||
print(f"[ WARNING ] {e}")
|
||||
except Exception as e:
|
||||
print(f"[ WARNING ] {e}")
|
||||
elif mode:
|
||||
try:
|
||||
# delete accuracy part in yaml if exist
|
||||
accuracy = yaml_config.get("evaluation", {}).get("accuracy", {})
|
||||
if accuracy:
|
||||
yaml_config.get("evaluation", {}).pop("accuracy", {})
|
||||
# performance iteration replace
|
||||
if iteration:
|
||||
try:
|
||||
performance = yaml_config.get("evaluation", {}).get("performance", {})
|
||||
prev_iteration = performance.get("iteration", None)
|
||||
performance.update({"iteration": iteration})
|
||||
print(f"Changed performance batch size from {prev_iteration} to {iteration}")
|
||||
except Exception as e:
|
||||
print(f"[ WARNING ] {e}")
|
||||
|
||||
if batch_size and mode == "latency":
|
||||
try:
|
||||
dataloader = yaml_config.get("evaluation", {}).get("performance", {}).get("dataloader", {})
|
||||
prev_batch_size = dataloader.get("batch_size", None)
|
||||
dataloader.update({"batch_size": batch_size})
|
||||
print(f"Changed accuracy batch size from {prev_batch_size} to {batch_size}")
|
||||
except Exception as e:
|
||||
print(f"[ WARNING ] {e}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ WARNING ] {e}")
|
||||
|
||||
if sampling_size:
|
||||
try:
|
||||
calibration = yaml_config.get("quantization", {}).get("calibration", {})
|
||||
prev_sampling_size = calibration.get("sampling_size", None)
|
||||
calibration.update({"sampling_size": sampling_size})
|
||||
print(f"Changed calibration sampling size from {prev_sampling_size} to {sampling_size}")
|
||||
except Exception as e:
|
||||
print(f"[ WARNING ] {e}")
|
||||
|
||||
if dtype:
|
||||
try:
|
||||
quantization = yaml_config.get("quantization", {})
|
||||
prev_dtype = quantization.get("dtype", None)
|
||||
quantization.update({"dtype": dtype})
|
||||
print(f"Changed dtype from {prev_dtype} to {dtype}")
|
||||
except Exception as e:
|
||||
print(f"[ WARNING ] {e}")
|
||||
|
||||
if tf_new_api == "true":
|
||||
try:
|
||||
model = yaml_config.get("model", {})
|
||||
prev_framework = model.get("framework", None)
|
||||
model.update({"framework": "inteltensorflow"})
|
||||
print(f"Changed framework from {prev_framework} to inteltensorflow")
|
||||
except Exception as e:
|
||||
print(f"[ WARNING ] {e}")
|
||||
|
||||
print("====== update_yaml_config_tuning ========")
|
||||
|
||||
yaml_content = yaml.round_trip_dump(yaml_config)
|
||||
|
||||
with open(yaml_file, "w") as output_file:
|
||||
output_file.write(yaml_content)
|
||||
|
||||
|
||||
def update_yaml_config_benchmark_acc(yaml_path: str, batch_size=None):
|
||||
with open(yaml_path) as f:
|
||||
yaml_config = yaml.round_trip_load(f, preserve_quotes=True)
|
||||
try:
|
||||
accuracy = yaml_config.get("evaluation", {}).get("accuracy", {})
|
||||
if not accuracy:
|
||||
raise AttributeError
|
||||
dataloader = accuracy.get("dataloader", {})
|
||||
if dataloader:
|
||||
dataloader.update({"batch_size": batch_size})
|
||||
configs = accuracy.get("configs", {})
|
||||
if configs:
|
||||
del accuracy["configs"]
|
||||
except Exception as e:
|
||||
print(f"[ WARNING ] {e}")
|
||||
|
||||
print("====== update_yaml_config_benchmark_acc ========")
|
||||
|
||||
yaml_content = yaml.round_trip_dump(yaml_config)
|
||||
|
||||
with open(yaml_path, "w") as output_file:
|
||||
output_file.write(yaml_content)
|
||||
|
||||
|
||||
def update_yaml_config_benchmark_perf(yaml_path: str, batch_size=None, multi_instance=None):
|
||||
# Get cpu information for multi-instance
|
||||
total_cores = psutil.cpu_count(logical=False)
|
||||
total_sockets = 1
|
||||
ncores_per_socket = total_cores / total_sockets
|
||||
ncores_per_instance = ncores_per_socket
|
||||
iters = 100
|
||||
|
||||
if multi_instance == "true":
|
||||
ncores_per_instance = 4
|
||||
iters = 500
|
||||
|
||||
with open(yaml_path) as f:
|
||||
yaml_config = yaml.round_trip_load(f, preserve_quotes=True)
|
||||
try:
|
||||
performance = yaml_config.get("evaluation", {}).get("performance", {})
|
||||
if not performance:
|
||||
raise AttributeError
|
||||
dataloader = performance.get("dataloader", {})
|
||||
if dataloader:
|
||||
dataloader.update({"batch_size": batch_size})
|
||||
performance.update({"iteration": iters})
|
||||
configs = performance.get("configs", {})
|
||||
if not configs:
|
||||
raise AttributeError
|
||||
else:
|
||||
configs.update(
|
||||
{
|
||||
"cores_per_instance": int(ncores_per_instance),
|
||||
"num_of_instance": int(ncores_per_socket // ncores_per_instance),
|
||||
}
|
||||
)
|
||||
for attr in ["intra_num_of_threads", "inter_num_of_threads", "kmp_blocktime"]:
|
||||
if configs.get(attr):
|
||||
del configs[attr]
|
||||
print(configs)
|
||||
except Exception as e:
|
||||
print(f"[ WARNING ] {e}")
|
||||
|
||||
print("====== update_yaml_config_benchmark_perf ========")
|
||||
|
||||
yaml_content = yaml.round_trip_dump(yaml_config)
|
||||
|
||||
with open(yaml_path, "w") as output_file:
|
||||
output_file.write(yaml_content)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_args()
|
||||
update_yaml_dataset(args.yaml, args.framework, args.dataset_location)
|
||||
update_yaml_config_tuning(args.yaml, strategy=args.strategy)
|
||||
print("===== multi_instance={} ====".format(args.multi_instance))
|
||||
if args.new_benchmark == "true":
|
||||
update_yaml_config_benchmark_acc(args.yaml, batch_size=args.batch_size)
|
||||
update_yaml_config_benchmark_perf(args.yaml, batch_size=args.batch_size, multi_instance=args.multi_instance)
|
@@ -0,0 +1,134 @@
|
||||
source /neural-compressor/.azure-pipelines/scripts/change_color.sh
|
||||
|
||||
set -e
|
||||
pip install coverage
|
||||
export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverage.${1}
|
||||
coverage_log="/neural-compressor/log_dir/coverage_log"
|
||||
coverage_log_base="/neural-compressor/log_dir/coverage_log_base"
|
||||
coverage_compare="/neural-compressor/log_dir/coverage_compare.html"
|
||||
cd /neural-compressor/log_dir
|
||||
|
||||
$BOLD_YELLOW && echo "collect coverage for PR branch" && $RESET
|
||||
cp ut_3x_coverage/.coverage /neural-compressor/
|
||||
mkdir -p coverage_PR
|
||||
cd /neural-compressor
|
||||
coverage report -m --rcfile=${COVERAGE_RCFILE} | tee ${coverage_log}
|
||||
coverage html -d log_dir/coverage_PR/htmlcov --rcfile=${COVERAGE_RCFILE}
|
||||
coverage xml -o log_dir/coverage_PR/coverage.xml --rcfile=${COVERAGE_RCFILE}
|
||||
ls -l log_dir/coverage_PR/htmlcov
|
||||
|
||||
|
||||
$BOLD_YELLOW && echo "collect coverage for baseline" && $RESET
|
||||
cd /neural-compressor
|
||||
cp -r /neural-compressor/.azure-pipelines .azure-pipelines-pr
|
||||
git config --global --add safe.directory /neural-compressor
|
||||
git fetch
|
||||
git checkout master
|
||||
rm -rf build dist *egg-info
|
||||
binary_index="${1%_fp8}"
|
||||
echo y | pip uninstall neural_compressor_${binary_index}
|
||||
cd /neural-compressor/.azure-pipelines-pr/scripts && bash install_nc.sh ${1}
|
||||
|
||||
coverage erase
|
||||
cd /neural-compressor/log_dir
|
||||
mkdir -p coverage_base
|
||||
rm -rf /neural-compressor/.coverage || true
|
||||
cp ut_3x_baseline_coverage/.coverage /neural-compressor
|
||||
|
||||
cd /neural-compressor
|
||||
coverage report -m --rcfile=${COVERAGE_RCFILE} | tee ${coverage_log_base}
|
||||
coverage html -d log_dir/coverage_base/htmlcov --rcfile=${COVERAGE_RCFILE}
|
||||
coverage xml -o log_dir/coverage_base/coverage.xml --rcfile=${COVERAGE_RCFILE}
|
||||
ls -l log_dir/coverage_base/htmlcov
|
||||
|
||||
get_coverage_data() {
|
||||
# Input argument
|
||||
local coverage_xml="$1"
|
||||
|
||||
# Get coverage data
|
||||
local coverage_data=$(python3 -c "import xml.etree.ElementTree as ET; root = ET.parse('$coverage_xml').getroot(); print(ET.tostring(root).decode())")
|
||||
if [[ -z "$coverage_data" ]]; then
|
||||
echo "Failed to get coverage data from $coverage_xml."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get lines coverage
|
||||
local lines_covered=$(echo "$coverage_data" | grep -o 'lines-covered="[0-9]*"' | cut -d '"' -f 2)
|
||||
local lines_valid=$(echo "$coverage_data" | grep -o 'lines-valid="[0-9]*"' | cut -d '"' -f 2)
|
||||
if [ $lines_valid == 0 ]; then
|
||||
local lines_coverage=0
|
||||
else
|
||||
local lines_coverage=$(awk "BEGIN {printf \"%.3f\", 100 * $lines_covered / $lines_valid}")
|
||||
fi
|
||||
|
||||
# Get branches coverage
|
||||
local branches_covered=$(echo "$coverage_data" | grep -o 'branches-covered="[0-9]*"' | cut -d '"' -f 2)
|
||||
local branches_valid=$(echo "$coverage_data" | grep -o 'branches-valid="[0-9]*"' | cut -d '"' -f 2)
|
||||
if [ $branches_valid == 0 ]; then
|
||||
local branches_coverage=0
|
||||
else
|
||||
local branches_coverage=$(awk "BEGIN {printf \"%.3f\", 100 * $branches_covered/$branches_valid}")
|
||||
fi
|
||||
|
||||
# Return values
|
||||
echo "$lines_covered $lines_valid $lines_coverage $branches_covered $branches_valid $branches_coverage"
|
||||
}
|
||||
|
||||
$BOLD_YELLOW && echo "compare coverage" && $RESET
|
||||
|
||||
coverage_PR_xml="log_dir/coverage_PR/coverage.xml"
|
||||
coverage_PR_data=$(get_coverage_data $coverage_PR_xml)
|
||||
read lines_PR_covered lines_PR_valid coverage_PR_lines_rate branches_PR_covered branches_PR_valid coverage_PR_branches_rate <<<"$coverage_PR_data"
|
||||
|
||||
coverage_base_xml="log_dir/coverage_base/coverage.xml"
|
||||
coverage_base_data=$(get_coverage_data $coverage_base_xml)
|
||||
read lines_base_covered lines_base_valid coverage_base_lines_rate branches_base_covered branches_base_valid coverage_base_branches_rate <<<"$coverage_base_data"
|
||||
|
||||
$BOLD_BLUE && echo "PR lines coverage: $lines_PR_covered/$lines_PR_valid ($coverage_PR_lines_rate%)" && $RESET
|
||||
$BOLD_BLUE && echo "PR branches coverage: $branches_PR_covered/$branches_PR_valid ($coverage_PR_branches_rate%)" && $RESET
|
||||
$BOLD_BLUE && echo "BASE lines coverage: $lines_base_covered/$lines_base_valid ($coverage_base_lines_rate%)" && $RESET
|
||||
$BOLD_BLUE && echo "BASE branches coverage: $branches_base_covered/$branches_base_valid ($coverage_base_branches_rate%)" && $RESET
|
||||
|
||||
$BOLD_YELLOW && echo "clear upload path" && $RESET
|
||||
rm -fr log_dir/coverage_PR/.coverage*
|
||||
rm -fr log_dir/coverage_base/.coverage*
|
||||
rm -fr log_dir/ut-coverage-*
|
||||
|
||||
# Declare an array to hold failed items
|
||||
declare -a fail_items=()
|
||||
|
||||
if (( $(bc -l <<< "${coverage_PR_lines_rate}+0.05 < ${coverage_base_lines_rate}") )); then
|
||||
fail_items+=("lines")
|
||||
fi
|
||||
if (( $(bc -l <<< "${coverage_PR_branches_rate}+0.05 < ${coverage_base_branches_rate}") )); then
|
||||
fail_items+=("branches")
|
||||
fi
|
||||
|
||||
if [[ ${#fail_items[@]} -ne 0 ]]; then
|
||||
fail_items_str=$(
|
||||
IFS=', '
|
||||
echo "${fail_items[*]}"
|
||||
)
|
||||
for item in "${fail_items[@]}"; do
|
||||
case "$item" in
|
||||
lines)
|
||||
decrease=$(echo $(printf "%.3f" $(echo "$coverage_PR_lines_rate - $coverage_base_lines_rate" | bc -l)))
|
||||
;;
|
||||
branches)
|
||||
decrease=$(echo $(printf "%.3f" $(echo "$coverage_PR_branches_rate - $coverage_base_branches_rate" | bc -l)))
|
||||
;;
|
||||
*)
|
||||
echo "Unknown item: $item"
|
||||
continue
|
||||
;;
|
||||
esac
|
||||
$BOLD_RED && echo "Unit Test failed with ${item} coverage decrease ${decrease}%" && $RESET
|
||||
done
|
||||
$BOLD_RED && echo "compare coverage to give detail info" && $RESET
|
||||
bash /neural-compressor/.azure-pipelines-pr/scripts/ut/compare_coverage.sh ${coverage_compare} ${coverage_log} ${coverage_log_base} "FAILED" ${coverage_PR_lines_rate} ${coverage_base_lines_rate} ${coverage_PR_branches_rate} ${coverage_base_branches_rate}
|
||||
exit 1
|
||||
else
|
||||
$BOLD_GREEN && echo "Unit Test success with coverage lines: ${coverage_PR_lines_rate}%, branches: ${coverage_PR_branches_rate}%" && $RESET
|
||||
$BOLD_GREEN && echo "compare coverage to give detail info" && $RESET
|
||||
bash /neural-compressor/.azure-pipelines-pr/scripts/ut/compare_coverage.sh ${coverage_compare} ${coverage_log} ${coverage_log_base} "SUCCESS" ${coverage_PR_lines_rate} ${coverage_base_lines_rate} ${coverage_PR_branches_rate} ${coverage_base_branches_rate}
|
||||
fi
|
@@ -0,0 +1,19 @@
|
||||
[run]
|
||||
branch = True
|
||||
|
||||
[report]
|
||||
include =
|
||||
*/neural_compressor/common/*
|
||||
*/neural_compressor/torch/*
|
||||
omit =
|
||||
*/neural_compressor/torch/algorithms/fp8_quant/*
|
||||
*/neural_compressor/torch/algorithms/mixed_low_precision/*
|
||||
*/neural_compressor/torch/amp/*
|
||||
exclude_lines =
|
||||
pragma: no cover
|
||||
raise NotImplementedError
|
||||
raise TypeError
|
||||
if self.device == "gpu":
|
||||
if device == "gpu":
|
||||
except ImportError:
|
||||
except Exception as e:
|
@@ -0,0 +1,15 @@
|
||||
[run]
|
||||
branch = True
|
||||
|
||||
[report]
|
||||
include =
|
||||
*/neural_compressor/torch/algorithms/fp8_quant/*
|
||||
*/neural_compressor/torch/algorithms/mixed_low_precision/*
|
||||
exclude_lines =
|
||||
pragma: no cover
|
||||
raise NotImplementedError
|
||||
raise TypeError
|
||||
if self.device == "gpu":
|
||||
if device == "gpu":
|
||||
except ImportError:
|
||||
except Exception as e:
|
@@ -0,0 +1,15 @@
|
||||
[run]
|
||||
branch = True
|
||||
|
||||
[report]
|
||||
include =
|
||||
*/neural_compressor/common/*
|
||||
*/neural_compressor/tensorflow/*
|
||||
exclude_lines =
|
||||
pragma: no cover
|
||||
raise NotImplementedError
|
||||
raise TypeError
|
||||
if self.device == "gpu":
|
||||
if device == "gpu":
|
||||
except ImportError:
|
||||
except Exception as e:
|
@@ -0,0 +1,47 @@
|
||||
#!/bin/bash
|
||||
python -c "import neural_compressor as nc"
|
||||
test_case="run 3x Torch"
|
||||
echo "${test_case}"
|
||||
|
||||
echo "##[section]Run import check"
|
||||
set -e
|
||||
python -c "import neural_compressor.torch"
|
||||
python -c "import neural_compressor.common"
|
||||
echo "##[section]import check pass"
|
||||
|
||||
# install requirements
|
||||
echo "##[group]set up UT env..."
|
||||
export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
|
||||
pip install -r /neural-compressor/test/3x/torch/requirements.txt
|
||||
pip install pytest-cov
|
||||
pip install pytest-html
|
||||
echo "##[endgroup]"
|
||||
pip list
|
||||
|
||||
export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverage.3x_pt
|
||||
inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
|
||||
cd /neural-compressor/test/3x || exit 1
|
||||
rm -rf tensorflow
|
||||
rm -rf torch/algorithms/fp8_quant
|
||||
rm -rf torch/quantization/fp8_quant
|
||||
|
||||
LOG_DIR=/neural-compressor/log_dir
|
||||
mkdir -p ${LOG_DIR}
|
||||
ut_log_name=${LOG_DIR}/ut_3x_pt.log
|
||||
|
||||
find . -name "test*.py" | sed "s,\.\/,python -m pytest --cov=\"${inc_path}\" --cov-report term --html=report.html --self-contained-html --cov-report xml:coverage.xml --cov-append -vs --disable-warnings ,g" > run.sh
|
||||
cat run.sh
|
||||
bash run.sh 2>&1 | tee ${ut_log_name}
|
||||
|
||||
cp report.html ${LOG_DIR}/
|
||||
|
||||
if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then
|
||||
echo "Find errors in pytest case, please check the output..."
|
||||
echo "Please search for '== FAILURES ==' or '== ERRORS =='"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# if ut pass, collect the coverage file into artifacts
|
||||
cp .coverage ${LOG_DIR}/.coverage
|
||||
|
||||
echo "UT finished successfully! "
|
@@ -0,0 +1,63 @@
|
||||
#!/bin/bash
|
||||
python -c "import neural_compressor as nc"
|
||||
test_case="run 3x Torch Habana FP8"
|
||||
echo "${test_case}"
|
||||
|
||||
echo "##[section]Run import check"
|
||||
set -e
|
||||
python -c "import neural_compressor.torch"
|
||||
python -c "import neural_compressor.common"
|
||||
echo "##[section]import check pass"
|
||||
|
||||
# install requirements
|
||||
echo "##[group]set up UT env..."
|
||||
export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
|
||||
sed -i '/^intel_extension_for_pytorch/d' /neural-compressor/test/3x/torch/requirements.txt
|
||||
sed -i '/^auto_round/d' /neural-compressor/test/3x/torch/requirements.txt
|
||||
cat /neural-compressor/test/3x/torch/requirements.txt
|
||||
pip install -r /neural-compressor/test/3x/torch/requirements.txt
|
||||
pip install pytest-cov
|
||||
pip install pytest-html
|
||||
pip install pytest-html-merger
|
||||
echo "##[endgroup]"
|
||||
pip list
|
||||
|
||||
export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8
|
||||
inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
|
||||
cd /neural-compressor/test/3x || exit 1
|
||||
|
||||
LOG_DIR=/neural-compressor/log_dir
|
||||
mkdir -p ${LOG_DIR}
|
||||
ut_log_name=${LOG_DIR}/ut_3x_pt_fp8.log
|
||||
pytest --cov="${inc_path}" -vs --disable-warnings --html=report_1.html --self-contained-html torch/quantization/weight_only/test_load.py 2>&1 | tee -a ${ut_log_name}
|
||||
pytest --cov="${inc_path}" -vs --disable-warnings --html=report_2.html --self-contained-html torch/quantization/weight_only/test_rtn.py 2>&1 | tee -a ${ut_log_name}
|
||||
# pytest --cov="${inc_path}" -vs --disable-warnings --html=report_3.html --self-contained-html torch/quantization/weight_only/test_autoround.py 2>&1 | tee -a ${ut_log_name}
|
||||
|
||||
# Below folder contains some special configuration for pytest so we need to enter the path and run it separately
|
||||
cd /neural-compressor/test/3x/torch/algorithms/fp8_quant
|
||||
pytest --cov="${inc_path}" -vs --disable-warnings --html=report_4.html --self-contained-html . 2>&1 | tee -a ${ut_log_name}
|
||||
cp .coverage ${LOG_DIR}/.coverage.algo_fp8
|
||||
cd - && mv /neural-compressor/test/3x/torch/algorithms/fp8_quant/*.html .
|
||||
|
||||
# Below folder contains some special configuration for pytest so we need to enter the path and run it separately
|
||||
cd /neural-compressor/test/3x/torch/quantization/fp8_quant
|
||||
pytest --cov="${inc_path}" -vs --disable-warnings --html=report_5.html --self-contained-html . 2>&1 | tee -a ${ut_log_name}
|
||||
cp .coverage ${LOG_DIR}/.coverage.quant_fp8
|
||||
cd - && mv /neural-compressor/test/3x/torch/quantization/fp8_quant/*.html .
|
||||
|
||||
mkdir -p report && mv *.html report
|
||||
pytest_html_merger -i ./report -o ./report.html
|
||||
cp report.html ${LOG_DIR}/
|
||||
|
||||
if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then
|
||||
echo "Find errors in pytest case, please check the output..."
|
||||
echo "Please search for '== FAILURES ==' or '== ERRORS =='"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# if ut pass, collect the coverage file into artifacts
|
||||
cp .coverage ${LOG_DIR}/.coverage
|
||||
cd ${LOG_DIR}
|
||||
coverage combine .coverage.*
|
||||
|
||||
echo "UT finished successfully! "
|
@@ -0,0 +1,76 @@
|
||||
#!/bin/bash
|
||||
python -c "import neural_compressor as nc"
|
||||
test_case="run 3x TensorFlow"
|
||||
echo "${test_case}"
|
||||
|
||||
echo "##[section]Run import check"
|
||||
set -e
|
||||
python -c "import neural_compressor.tensorflow"
|
||||
python -c "import neural_compressor.common"
|
||||
echo "##[section]import check pass"
|
||||
|
||||
# install requirements
|
||||
echo "##[group]set up UT env..."
|
||||
pip install -r /neural-compressor/test/3x/tensorflow/requirements.txt
|
||||
pip install pytest-cov
|
||||
pip install pytest-html
|
||||
pip install pytest-html-merger
|
||||
echo "##[endgroup]"
|
||||
pip list
|
||||
|
||||
export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverage.3x_tf
|
||||
inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
|
||||
cd /neural-compressor/test/3x || exit 1
|
||||
rm -rf torch
|
||||
rm -rf onnxrt
|
||||
mv tensorflow/keras ../3x_keras
|
||||
mv tensorflow/quantization/ptq/newapi ../3x_newapi
|
||||
|
||||
LOG_DIR=/neural-compressor/log_dir
|
||||
mkdir -p ${LOG_DIR}
|
||||
ut_log_name=${LOG_DIR}/ut_3x_tf.log
|
||||
|
||||
# test for tensorflow ut
|
||||
pytest --cov="${inc_path}" -vs --disable-warnings --html=report_tf_quant.html --self-contained-html ./tensorflow/quantization 2>&1 | tee -a ${ut_log_name}
|
||||
rm -rf tensorflow/quantization
|
||||
pytest --cov="${inc_path}" --cov-append -vs --disable-warnings --html=report_tf_test_quantize_model.html --self-contained-html ./tensorflow/test_quantize_model.py 2>&1 | tee -a ${ut_log_name}
|
||||
rm -rf tensorflow/test_quantize_model.py
|
||||
pytest --cov="${inc_path}" --cov-append -vs --disable-warnings --html=report_tf.html --self-contained-html . 2>&1 | tee -a ${ut_log_name}
|
||||
|
||||
# test for tensorflow new api ut
|
||||
pip uninstall tensorflow -y
|
||||
pip install /tf_dataset/tf_binary/230928/tensorflow*.whl
|
||||
pip install cmake
|
||||
pip install protobuf==3.20.3
|
||||
pip install horovod==0.27.0
|
||||
pip list
|
||||
rm -rf tensorflow/*
|
||||
mkdir -p tensorflow/quantization/ptq
|
||||
mv ../3x_newapi tensorflow/quantization/ptq/newapi
|
||||
find . -name "test*.py" | sed "s,\.\/,python -m pytest --cov=${inc_path} --cov-append -vs --disable-warnings ,g" > run.sh
|
||||
cat run.sh
|
||||
bash run.sh 2>&1 | tee -a ${ut_log_name}
|
||||
|
||||
# test for itex ut
|
||||
rm -rf tensorflow/*
|
||||
mv ../3x_keras tensorflow/keras
|
||||
pip uninstall tensorflow -y
|
||||
pip install intel-extension-for-tensorflow[cpu]
|
||||
pytest --cov="${inc_path}" --cov-append -vs --disable-warnings --html=report_keras.html --self-contained-html ./tensorflow 2>&1 | tee -a ${ut_log_name}
|
||||
|
||||
mkdir -p report
|
||||
mv *.html report
|
||||
pytest_html_merger -i ./report -o ./report.html
|
||||
|
||||
cp report.html ${LOG_DIR}/
|
||||
|
||||
if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then
|
||||
echo "Find errors in pytest case, please check the output..."
|
||||
echo "Please search for '== FAILURES ==' or '== ERRORS =='"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# if ut pass, collect the coverage file into artifacts
|
||||
cp .coverage ${LOG_DIR}/.coverage
|
||||
|
||||
echo "UT finished successfully! "
|
@@ -0,0 +1,139 @@
|
||||
source /neural-compressor/.azure-pipelines/scripts/change_color.sh
|
||||
|
||||
pip install coverage
|
||||
export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/coverage.file
|
||||
coverage_log="/neural-compressor/log_dir/coverage_log"
|
||||
coverage_log_base="/neural-compressor/log_dir/coverage_log_base"
|
||||
coverage_compare="/neural-compressor/log_dir/coverage_compare.html"
|
||||
cd /neural-compressor/log_dir
|
||||
|
||||
$BOLD_YELLOW && echo "##[group]collect coverage for PR branch" && $RESET
|
||||
mkdir -p coverage_PR
|
||||
cp ut_*_coverage/.coverage.* ./coverage_PR/
|
||||
|
||||
cd coverage_PR
|
||||
coverage combine --keep --rcfile=${COVERAGE_RCFILE}
|
||||
cp .coverage /neural-compressor/.coverage
|
||||
cd /neural-compressor
|
||||
coverage report -m --rcfile=${COVERAGE_RCFILE} | tee ${coverage_log}
|
||||
coverage html -d log_dir/coverage_PR/htmlcov --rcfile=${COVERAGE_RCFILE}
|
||||
coverage xml -o log_dir/coverage_PR/coverage.xml --rcfile=${COVERAGE_RCFILE}
|
||||
ls -l log_dir/coverage_PR/htmlcov
|
||||
|
||||
cd /neural-compressor
|
||||
cp -r /neural-compressor/.azure-pipelines .azure-pipelines-pr
|
||||
git config --global --add safe.directory /neural-compressor
|
||||
git fetch
|
||||
git checkout master
|
||||
rm -rf build dist *egg-info
|
||||
echo y | pip uninstall neural-compressor
|
||||
cd /neural-compressor/.azure-pipelines-pr/scripts && bash install_nc.sh
|
||||
echo "##[endgroup]"
|
||||
|
||||
$BOLD_YELLOW && echo "##[group]collect coverage for baseline" && $RESET
|
||||
coverage erase
|
||||
cd /neural-compressor/log_dir
|
||||
mkdir -p coverage_base
|
||||
cp ut-base_*_coverage/.coverage.* ./coverage_base/
|
||||
|
||||
cd coverage_base
|
||||
coverage combine --keep --rcfile=${COVERAGE_RCFILE}
|
||||
cp .coverage /neural-compressor/.coverage
|
||||
cd /neural-compressor
|
||||
coverage report -m --rcfile=${COVERAGE_RCFILE} | tee ${coverage_log_base}
|
||||
coverage html -d log_dir/coverage_base/htmlcov --rcfile=${COVERAGE_RCFILE}
|
||||
coverage xml -o log_dir/coverage_base/coverage.xml --rcfile=${COVERAGE_RCFILE}
|
||||
ls -l log_dir/coverage_base/htmlcov
|
||||
echo "##[endgroup]"
|
||||
|
||||
get_coverage_data() {
|
||||
# Input argument
|
||||
local coverage_xml="$1"
|
||||
|
||||
# Get coverage data
|
||||
local coverage_data=$(python3 -c "import xml.etree.ElementTree as ET; root = ET.parse('$coverage_xml').getroot(); print(ET.tostring(root).decode())")
|
||||
if [[ -z "$coverage_data" ]]; then
|
||||
echo "Failed to get coverage data from $coverage_xml."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get lines coverage
|
||||
local lines_covered=$(echo "$coverage_data" | grep -o 'lines-covered="[0-9]*"' | cut -d '"' -f 2)
|
||||
local lines_valid=$(echo "$coverage_data" | grep -o 'lines-valid="[0-9]*"' | cut -d '"' -f 2)
|
||||
if [ $lines_valid == 0 ]; then
|
||||
local lines_coverage=0
|
||||
else
|
||||
local lines_coverage=$(awk "BEGIN {printf \"%.3f\", 100 * $lines_covered / $lines_valid}")
|
||||
fi
|
||||
|
||||
# Get branches coverage
|
||||
local branches_covered=$(echo "$coverage_data" | grep -o 'branches-covered="[0-9]*"' | cut -d '"' -f 2)
|
||||
local branches_valid=$(echo "$coverage_data" | grep -o 'branches-valid="[0-9]*"' | cut -d '"' -f 2)
|
||||
if [ $branches_valid == 0 ]; then
|
||||
local branches_coverage=0
|
||||
else
|
||||
local branches_coverage=$(awk "BEGIN {printf \"%.3f\", 100 * $branches_covered/$branches_valid}")
|
||||
fi
|
||||
|
||||
# Return values
|
||||
echo "$lines_covered $lines_valid $lines_coverage $branches_covered $branches_valid $branches_coverage"
|
||||
}
|
||||
|
||||
$BOLD_YELLOW && echo "compare coverage" && $RESET
|
||||
|
||||
coverage_PR_xml="log_dir/coverage_PR/coverage.xml"
|
||||
coverage_PR_data=$(get_coverage_data $coverage_PR_xml)
|
||||
read lines_PR_covered lines_PR_valid coverage_PR_lines_rate branches_PR_covered branches_PR_valid coverage_PR_branches_rate <<<"$coverage_PR_data"
|
||||
|
||||
coverage_base_xml="log_dir/coverage_base/coverage.xml"
|
||||
coverage_base_data=$(get_coverage_data $coverage_base_xml)
|
||||
read lines_base_covered lines_base_valid coverage_base_lines_rate branches_base_covered branches_base_valid coverage_base_branches_rate <<<"$coverage_base_data"
|
||||
|
||||
$BOLD_BLUE && echo "PR lines coverage: $lines_PR_covered/$lines_PR_valid ($coverage_PR_lines_rate%)" && $RESET
|
||||
$BOLD_BLUE && echo "PR branches coverage: $branches_PR_covered/$branches_PR_valid ($coverage_PR_branches_rate%)" && $RESET
|
||||
$BOLD_BLUE && echo "BASE lines coverage: $lines_base_covered/$lines_base_valid ($coverage_base_lines_rate%)" && $RESET
|
||||
$BOLD_BLUE && echo "BASE branches coverage: $branches_base_covered/$branches_base_valid ($coverage_base_branches_rate%)" && $RESET
|
||||
|
||||
$BOLD_YELLOW && echo "clear upload path" && $RESET
|
||||
rm -fr log_dir/coverage_PR/.coverage*
|
||||
rm -fr log_dir/coverage_base/.coverage*
|
||||
rm -fr log_dir/ut-coverage-*
|
||||
|
||||
# Declare an array to hold failed items
|
||||
declare -a fail_items=()
|
||||
|
||||
if (( $(bc -l <<< "${coverage_PR_lines_rate}+0.05 < ${coverage_base_lines_rate}") )); then
|
||||
fail_items+=("lines")
|
||||
fi
|
||||
if (( $(bc -l <<< "${coverage_PR_branches_rate}+0.05 < ${coverage_base_branches_rate}") )); then
|
||||
fail_items+=("branches")
|
||||
fi
|
||||
|
||||
if [[ ${#fail_items[@]} -ne 0 ]]; then
|
||||
fail_items_str=$(
|
||||
IFS=', '
|
||||
echo "${fail_items[*]}"
|
||||
)
|
||||
for item in "${fail_items[@]}"; do
|
||||
case "$item" in
|
||||
lines)
|
||||
decrease=$(echo $(printf "%.3f" $(echo "$coverage_PR_lines_rate - $coverage_base_lines_rate" | bc -l)))
|
||||
;;
|
||||
branches)
|
||||
decrease=$(echo $(printf "%.3f" $(echo "$coverage_PR_branches_rate - $coverage_base_branches_rate" | bc -l)))
|
||||
;;
|
||||
*)
|
||||
echo "Unknown item: $item"
|
||||
continue
|
||||
;;
|
||||
esac
|
||||
$BOLD_RED && echo "Unit Test failed with ${item} coverage decrease ${decrease}%" && $RESET
|
||||
done
|
||||
$BOLD_RED && echo "compare coverage to give detail info" && $RESET
|
||||
bash /neural-compressor/.azure-pipelines-pr/scripts/ut/compare_coverage.sh ${coverage_compare} ${coverage_log} ${coverage_log_base} "FAILED" ${coverage_PR_lines_rate} ${coverage_base_lines_rate} ${coverage_PR_branches_rate} ${coverage_base_branches_rate}
|
||||
exit 1
|
||||
else
|
||||
$BOLD_GREEN && echo "Unit Test success with coverage lines: ${coverage_PR_lines_rate}%, branches: ${coverage_PR_branches_rate}%" && $RESET
|
||||
$BOLD_GREEN && echo "compare coverage to give detail info" && $RESET
|
||||
bash /neural-compressor/.azure-pipelines-pr/scripts/ut/compare_coverage.sh ${coverage_compare} ${coverage_log} ${coverage_log_base} "SUCCESS" ${coverage_PR_lines_rate} ${coverage_base_lines_rate} ${coverage_PR_branches_rate} ${coverage_base_branches_rate}
|
||||
fi
|
@@ -0,0 +1,225 @@
|
||||
output_file=$1
|
||||
coverage_pr_log=$2
|
||||
coverage_base_log=$3
|
||||
coverage_status=$4
|
||||
coverage_PR_lines_rate=$5
|
||||
coverage_base_lines_rate=$6
|
||||
coverage_PR_branches_rate=$7
|
||||
coverage_base_branches_rate=$8
|
||||
module_name="neural_compressor"
|
||||
[[ ! -f $coverage_pr_log ]] && exit 1
|
||||
[[ ! -f $coverage_base_log ]] && exit 1
|
||||
file_name="./coverage_compare"
|
||||
sed -i "s|\/usr.*${module_name}\/||g" $coverage_pr_log
|
||||
sed -i "s|\/usr.*${module_name}\/||g" $coverage_base_log
|
||||
diff $coverage_pr_log $coverage_base_log >diff_file
|
||||
[[ $? == 0 ]] && exit 0
|
||||
grep -Po "[<,>,\d].*" diff_file | awk '{print $1 "\t" $2 "\t" $3 "\t" $4 "\t" $5 "\t" $6 "\t" $7}' | sed "/Name/d" | sed "/TOTAL/d" | sed "/---/d" >$file_name
|
||||
[[ ! -s $file_name ]] && exit 0
|
||||
[[ -f $output_file ]] && rm -f $output_file
|
||||
touch $output_file
|
||||
|
||||
function generate_html_head {
|
||||
|
||||
cat >${output_file} <<eof
|
||||
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>UT coverage</title>
|
||||
<style type="text/css">
|
||||
body {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
background: white no-repeat left top;
|
||||
}
|
||||
|
||||
.main {
|
||||
margin: 20px auto 10px auto;
|
||||
background: white;
|
||||
border-radius: 8px;
|
||||
-moz-border-radius: 8px;
|
||||
-webkit-border-radius: 8px;
|
||||
padding: 0 30px 30px 30px;
|
||||
border: 1px solid #adaa9f;
|
||||
box-shadow: 0 2px 2px #9c9c9c;
|
||||
-moz-box-shadow: 0 2px 2px #9c9c9c;
|
||||
-webkit-box-shadow: 0 2px 2px #9c9c9c;
|
||||
}
|
||||
|
||||
.features-table {
|
||||
width: 100%;
|
||||
margin: 0 auto;
|
||||
border-collapse: separate;
|
||||
border-spacing: 0;
|
||||
text-shadow: 0 1px 0 #fff;
|
||||
color: #2a2a2a;
|
||||
background: #fafafa;
|
||||
background-image: -moz-linear-gradient(top, #fff, #eaeaea, #fff);
|
||||
/* Firefox 3.6 */
|
||||
background-image: -webkit-gradient(linear, center bottom, center top, from(#fff), color-stop(0.5, #eaeaea), to(#fff));
|
||||
font-family: Verdana, Arial, Helvetica
|
||||
}
|
||||
|
||||
.features-table th,
|
||||
td {
|
||||
text-align: center;
|
||||
height: 25px;
|
||||
line-height: 25px;
|
||||
padding: 0 8px;
|
||||
border: 1px solid #cdcdcd;
|
||||
box-shadow: 0 1px 0 white;
|
||||
-moz-box-shadow: 0 1px 0 white;
|
||||
-webkit-box-shadow: 0 1px 0 white;
|
||||
white-space: nowrap;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
eof
|
||||
}
|
||||
|
||||
function extract_diff_data() {
|
||||
local file_name=$1 diff_file=$2 reg=$3
|
||||
local file=$(cat $file_name | grep "${diff_file}" | grep -v ".*/${diff_file}" | grep -Po "${reg}.*" | sed "s/${reg}[ \t]*//g" | awk '{print $1}')
|
||||
local stmts=$(cat $file_name | grep "${diff_file}" | grep -v ".*/${diff_file}" | grep -Po "${reg}.*" | sed "s/${reg}[ \t]*//g" | awk '{print $2}')
|
||||
local miss=$(cat $file_name | grep "${diff_file}" | grep -v ".*/${diff_file}" | grep -Po "${reg}.*" | sed "s/${reg}[ \t]*//g" | awk '{print $3}')
|
||||
local cover=$(cat $file_name | grep "${diff_file}" | grep -v ".*/${diff_file}" | grep -Po "${reg}.*" | sed "s/${reg}[ \t]*//g" | awk '{print $6}')
|
||||
local branch=$(cat $file_name | grep "${diff_file}" | grep -v ".*/${diff_file}" | grep -Po "${reg}.*" | sed "s/${reg}[ \t]*//g" | awk '{print $4}')
|
||||
|
||||
echo "$file $stmts $miss $cover $branch"
|
||||
}
|
||||
|
||||
function write_compare_details() {
|
||||
local file=$1 stmts1=$2 miss1=$3 branch1=$4 cover1=$5 stmts2=$6 miss2=$7 branch2=$8 cover2=$9
|
||||
echo """
|
||||
<tr>
|
||||
<td>PR | BASE</td>
|
||||
<td style=\"text-align:left\">${file}</td>
|
||||
<td style=\"text-align:left\">${stmts1} | ${stmts2}</td>
|
||||
<td style=\"text-align:left\">${miss1} | ${miss2}</td>
|
||||
<td style=\"text-align:left\">${branch1} | ${branch2}</td>
|
||||
<td style=\"text-align:left\">${cover1} | ${cover2}</td>
|
||||
</tr>
|
||||
""" >>${output_file}
|
||||
}
|
||||
|
||||
function get_color() {
|
||||
local decrease=$1
|
||||
if (($(echo "$decrease < 0" | bc -l))); then
|
||||
local color="#FFD2D2"
|
||||
else
|
||||
local color="#90EE90"
|
||||
fi
|
||||
echo "$color"
|
||||
}
|
||||
|
||||
function generate_coverage_summary() {
|
||||
# generate table head
|
||||
local Lines_cover_decrease=$(echo $(printf "%.3f" $(echo "$coverage_PR_lines_rate - $coverage_base_lines_rate" | bc -l)))
|
||||
local Branches_cover_decrease=$(echo $(printf "%.3f" $(echo "$coverage_PR_branches_rate - $coverage_base_branches_rate" | bc -l)))
|
||||
|
||||
read lines_coverage_color <<<"$(get_color ${Lines_cover_decrease})"
|
||||
read branches_coverage_color <<<"$(get_color ${Branches_cover_decrease})"
|
||||
|
||||
echo """
|
||||
<body>
|
||||
<div class="main">
|
||||
<h1 align="center">Coverage Summary : ${coverage_status}</h1>
|
||||
<table class=\"features-table\" style=\"width: 60%;margin-left:auto;margin-right:auto;empty-cells: hide\">
|
||||
<tr>
|
||||
<th></th>
|
||||
<th>Base coverage</th>
|
||||
<th>PR coverage</th>
|
||||
<th>Diff</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> Lines </td>
|
||||
<td> ${coverage_base_lines_rate}% </td>
|
||||
<td> ${coverage_PR_lines_rate}% </td>
|
||||
<td style=\"background-color:${lines_coverage_color}\"> ${Lines_cover_decrease}% </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td> Branches </td>
|
||||
<td> ${coverage_base_branches_rate}% </td>
|
||||
<td> ${coverage_PR_branches_rate}% </td>
|
||||
<td style=\"background-color:${branches_coverage_color}\"> ${Branches_cover_decrease}% </td>
|
||||
</tr>
|
||||
</table>
|
||||
</div>
|
||||
""" >>${output_file}
|
||||
}
|
||||
|
||||
function generate_coverage_details() {
|
||||
echo """
|
||||
<div class="main">
|
||||
<h2 align="center">Coverage Detail</h2>
|
||||
<table class=\"features-table\" style=\"width: 60%;margin-left:auto;margin-right:auto;empty-cells: hide\">
|
||||
<tr>
|
||||
<th>Commit</th>
|
||||
<th>FileName</th>
|
||||
<th>Stmts</th>
|
||||
<th>Miss</th>
|
||||
<th>Branch</th>
|
||||
<th>Cover</th>
|
||||
</tr>
|
||||
""" >>${output_file}
|
||||
# generate compare detail
|
||||
cat ${file_name} | while read line; do
|
||||
if [[ $(echo $line | grep "[0-9]a[0-9]") ]] && [[ $(grep -A 1 "$line" ${file_name} | grep ">") ]]; then
|
||||
diff_lines=$(sed -n "/${line}/,/^[0-9]/p" ${file_name} | grep ">")
|
||||
diff_file_name=$(sed -n "/${line}/,/^[0-9]/p" ${file_name} | grep -Po ">.*[a-z,A-Z].*.py" | sed "s|>||g")
|
||||
for diff_file in ${diff_file_name}; do
|
||||
diff_file=$(echo "${diff_file}" | sed 's/[ \t]*//g')
|
||||
diff_coverage_data=$(extract_diff_data ${file_name} ${diff_file} ">")
|
||||
read file stmts miss cover branch <<<"$diff_coverage_data"
|
||||
write_compare_details $file "NA" "NA" "NA" "NA" $stmts $miss $branch $cover
|
||||
done
|
||||
elif [[ $(echo $line | grep "[0-9]c[0-9]") ]] && [[ $(cat ${file_name} | grep -A 1 "$line" | grep "<") ]]; then
|
||||
diff_lines=$(sed -n "/${line}/,/^[0-9]/p" ${file_name} | grep "<")
|
||||
diff_file_name=$(sed -n "/${line}/,/^[0-9]/p" ${file_name} | grep -Po "<.*[a-z,A-Z].*.py" | sed "s|<||g")
|
||||
for diff_file in ${diff_file_name}; do
|
||||
diff_file=$(echo "${diff_file}" | sed 's/[ \t]*//g')
|
||||
diff_coverage_data1=$(extract_diff_data ${file_name} ${diff_file} "<")
|
||||
read file1 stmts1 miss1 cover1 branch1 <<<"$diff_coverage_data1"
|
||||
diff_coverage_data2=$(extract_diff_data ${file_name} ${diff_file} ">")
|
||||
read file2 stmts2 miss2 cover2 branch2 <<<"$diff_coverage_data2"
|
||||
write_compare_details $file1 $stmts1 $miss1 $branch1 $cover1 $stmts2 $miss2 $branch2 $cover2
|
||||
done
|
||||
elif [[ $(echo $line | grep "[0-9]d[0-9]") ]] && [[ $(cat ${file_name} | grep -A 1 "$line" | grep "<") ]]; then
|
||||
diff_lines=$(sed -n "/${line}/,/^[0-9]/p" ${file_name} | grep "<")
|
||||
diff_file_name=$(sed -n "/${line}/,/^[0-9]/p" ${file_name} | grep -Po "<.*[a-z,A-Z].*.py" | sed "s|<||g")
|
||||
for diff_file in ${diff_file_name}; do
|
||||
diff_file=$(echo "${diff_file}" | sed 's/[ \t]*//g')
|
||||
diff_coverage_data=$(extract_diff_data ${file_name} ${diff_file} "<")
|
||||
read file stmts miss cover branch <<<"$diff_coverage_data"
|
||||
write_compare_details $file $stmts $miss $branch $cover "NA" "NA" "NA" "NA"
|
||||
done
|
||||
fi
|
||||
done
|
||||
# generate table end
|
||||
echo """
|
||||
</table>
|
||||
</div>
|
||||
</body>
|
||||
|
||||
</html>""" >>${output_file}
|
||||
}
|
||||
|
||||
function main {
|
||||
generate_html_head
|
||||
generate_coverage_summary
|
||||
|
||||
if [[ ${coverage_status} = "SUCCESS" ]]; then
|
||||
echo """</body></html>""" >>${output_file}
|
||||
echo "coverage PASS, no need to compare difference"
|
||||
exit 0
|
||||
else
|
||||
generate_coverage_details
|
||||
fi
|
||||
}
|
||||
|
||||
main
|
@@ -0,0 +1,30 @@
|
||||
[run]
|
||||
branch = True
|
||||
|
||||
[report]
|
||||
omit =
|
||||
*/**/fake*yaml
|
||||
*/**/fake.py
|
||||
*/neural_compressor/model/nets_factory.py
|
||||
*/neural_compressor/benchmark.py
|
||||
*/neural_compressor/experimental/benchmark.py
|
||||
*/neural_compressor/contrib/strategy/tpe.py
|
||||
*/intel_extension_for_transformers/backends/*
|
||||
*/intel_extension_for_transformers/optimization/utils/get_throughput.py
|
||||
*/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/fuse_decomposed_in.py
|
||||
*/neural_compressor/adaptor/tf_utils/quantize_graph/qdq/fuse_qdq_in.py
|
||||
*/neural_compressor/adaptor/tf_utils/graph_rewriter/int8/freeze_value.py
|
||||
*/neural_compressor/template/*
|
||||
*/neural_compressor/common/*
|
||||
*/neural_compressor/torch/*
|
||||
*/neural_compressor/tensorflow/*
|
||||
exclude_lines =
|
||||
pragma: no cover
|
||||
raise NotImplementedError
|
||||
raise TypeError
|
||||
if self.device == "gpu":
|
||||
if device == "gpu":
|
||||
except ImportError:
|
||||
except Exception as e:
|
||||
onnx_version < ONNX18_VERSION
|
||||
onnx_version >= ONNX18_VERSION
|
@@ -0,0 +1,116 @@
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
echo "copy pre-train model..."
|
||||
mkdir -p /tmp/.neural_compressor/inc_ut || true
|
||||
cp -r /tf_dataset/ut-localfile/resnet_v2 /tmp/.neural_compressor/inc_ut || true
|
||||
mkdir -p ~/.keras/datasets || true
|
||||
cp -r /tf_dataset/ut-localfile/cifar-10-batches-py* ~/.keras/datasets || true
|
||||
ll ~/.keras/datasets
|
||||
|
||||
echo "install dependencies..."
|
||||
echo "tensorflow version is $tensorflow_version"
|
||||
echo "itex version is $itex_version"
|
||||
echo "pytorch version is $pytorch_version"
|
||||
echo "torchvision version is $torchvision_version"
|
||||
echo "ipex version is $ipex_version"
|
||||
echo "onnx version is $onnx_version"
|
||||
echo "onnxruntime version is $onnxruntime_version"
|
||||
echo "mxnet version is $mxnet_version"
|
||||
|
||||
test_case=$1
|
||||
echo -e "##[group]test case is ${test_case}"
|
||||
|
||||
if [[ "${tensorflow_version}" == *"-official" ]]; then
|
||||
pip install tensorflow==${tensorflow_version%-official}
|
||||
elif [[ "${tensorflow_version}" == "spr-base" ]]; then
|
||||
pip install /tf_dataset/tf_binary/230928/tensorflow*.whl
|
||||
pip install cmake
|
||||
pip install protobuf==3.20.3
|
||||
pip install horovod==0.27.0
|
||||
if [[ $? -ne 0 ]]; then
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "${tensorflow_version}" != "" ]]; then
|
||||
pip install intel-tensorflow==${tensorflow_version}
|
||||
fi
|
||||
|
||||
if [[ "${itex_version}" != "" ]]; then
|
||||
pip install --upgrade intel-extension-for-tensorflow[cpu]==${itex_version}
|
||||
pip install tf2onnx
|
||||
fi
|
||||
|
||||
if [[ "${pytorch_version}" != "" ]]; then
|
||||
pip install torch==${pytorch_version} -f https://download.pytorch.org/whl/torch_stable.html
|
||||
fi
|
||||
|
||||
if [[ "${torchvision_version}" != "" ]]; then
|
||||
pip install torchvision==${torchvision_version} -f https://download.pytorch.org/whl/torch_stable.html
|
||||
fi
|
||||
|
||||
if [[ "${ipex_version}" != "" ]]; then
|
||||
pip install intel-extension-for-pytorch=="${ipex_version%+cpu}"
|
||||
fi
|
||||
|
||||
if [[ "${onnx_version}" != "" ]]; then
|
||||
pip install onnx==${onnx_version}
|
||||
fi
|
||||
|
||||
if [[ "${onnxruntime_version}" != "" ]]; then
|
||||
pip install onnxruntime==${onnxruntime_version}
|
||||
if [[ "${onnxruntime_version}" == "1.14"* ]]; then
|
||||
pip install onnxruntime-extensions==0.8.0
|
||||
else
|
||||
pip install onnxruntime-extensions
|
||||
fi
|
||||
pip install optimum
|
||||
fi
|
||||
|
||||
if [ "${mxnet_version}" != '' ]; then
|
||||
pip install numpy==1.23.5
|
||||
echo "re-install pycocotools resolve the issue with numpy..."
|
||||
pip uninstall pycocotools -y
|
||||
pip install --no-cache-dir pycocotools
|
||||
pip install mxnet==${mxnet_version}
|
||||
fi
|
||||
|
||||
# install special test env requirements
|
||||
# common deps
|
||||
pip install cmake
|
||||
pip install transformers
|
||||
|
||||
if [[ $(echo "${test_case}" | grep -c "others") != 0 ]];then
|
||||
pip install tf_slim xgboost accelerate==0.21.0 peft
|
||||
elif [[ $(echo "${test_case}" | grep -c "nas") != 0 ]]; then
|
||||
pip install dynast==1.6.0rc1
|
||||
elif [[ $(echo "${test_case}" | grep -c "tf pruning") != 0 ]]; then
|
||||
pip install tensorflow-addons
|
||||
# Workaround
|
||||
# horovod can't be install in the env with TF and PT together
|
||||
# so test distribute cases in the env with single fw installed
|
||||
pip install horovod
|
||||
fi
|
||||
|
||||
if [[ $(echo "${test_case}" | grep -c "api") != 0 ]] || [[ $(echo "${test_case}" | grep -c "adaptor") != 0 ]]; then
|
||||
pip install auto-round
|
||||
fi
|
||||
|
||||
# test deps
|
||||
pip install coverage
|
||||
pip install pytest
|
||||
pip install pytest-html
|
||||
|
||||
echo "##[endgroup]"
|
||||
|
||||
pip list
|
||||
echo "[DEBUG] list pipdeptree..."
|
||||
pip install pipdeptree
|
||||
pipdeptree
|
||||
|
||||
# import torch before import tensorflow
|
||||
if [[ $(echo "${test_case}" | grep -c "run basic api") != 0 ]] || [[ $(echo "${test_case}" | grep -c "run basic others") != 0 ]] || [[ $(echo "${test_case}" | grep -c "run basic adaptor") != 0 ]]; then
|
||||
cd /neural-compressor/test || exit 1
|
||||
find . -name "test*.py" | xargs sed -i 's/import tensorflow as tf/import torch; import tensorflow as tf/g'
|
||||
find . -name "test*.py" | xargs sed -i 's/import tensorflow.compat.v1 as tf/import torch; import tensorflow.compat.v1 as tf/g'
|
||||
find . -name "test*.py" | xargs sed -i 's/from tensorflow import keras/import torch; from tensorflow import keras/g'
|
||||
fi
|
@@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
python -c "import neural_compressor as nc;print(nc.version.__version__)"
|
||||
test_case="run basic adaptor"
|
||||
echo "${test_case}"
|
||||
|
||||
echo "specify fwk version..."
|
||||
source /neural-compressor/.azure-pipelines/scripts/fwk_version.sh $1
|
||||
|
||||
echo "set up UT env..."
|
||||
bash /neural-compressor/.azure-pipelines/scripts/ut/env_setup.sh "${test_case}"
|
||||
export LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH
|
||||
export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/coverage.file
|
||||
lpot_path=$(python -c 'import neural_compressor; import os; print(os.path.dirname(neural_compressor.__file__))')
|
||||
cd /neural-compressor/test || exit 1
|
||||
find ./adaptor -name "test*.py" | sed 's,\.\/,coverage run --source='"${lpot_path}"' --append ,g' | sed 's/$/ --verbose/'> run.sh
|
||||
|
||||
LOG_DIR=/neural-compressor/log_dir
|
||||
mkdir -p ${LOG_DIR}
|
||||
ut_log_name=${LOG_DIR}/ut_adaptor.log
|
||||
|
||||
echo "cat run.sh..."
|
||||
sort run.sh -o run.sh
|
||||
cat run.sh | tee ${ut_log_name}
|
||||
echo "------UT start-------"
|
||||
bash -x run.sh 2>&1 | tee -a ${ut_log_name}
|
||||
echo "------UT end -------"
|
||||
|
||||
if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] || [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
|
||||
echo "Find errors in UT test, please check the output..."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cp .coverage ${LOG_DIR}/.coverage.adaptor
|
||||
|
||||
echo "UT finished successfully! "
|
@@ -0,0 +1,33 @@
|
||||
#!/bin/bash
|
||||
python -c "import neural_compressor as nc;print(nc.version.__version__)"
|
||||
test_case="run basic tfnewapi"
|
||||
echo "${test_case}"
|
||||
|
||||
echo "specify fwk version..."
|
||||
export tensorflow_version='spr-base'
|
||||
export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/coverage.file
|
||||
# export FORCE_BF16=1
|
||||
|
||||
echo "set up UT env..."
|
||||
bash /neural-compressor/.azure-pipelines/scripts/ut/env_setup.sh "${test_case}"
|
||||
lpot_path=$(python -c 'import neural_compressor; import os; print(os.path.dirname(neural_compressor.__file__))')
|
||||
cd /neural-compressor/test || exit 1
|
||||
find ./tfnewapi -name "test*.py" | sed 's,\.\/,coverage run --source='"${lpot_path}"' --append ,g' | sed 's/$/ --verbose/'> run.sh
|
||||
|
||||
LOG_DIR=/neural-compressor/log_dir
|
||||
mkdir -p ${LOG_DIR}
|
||||
ut_log_name=${LOG_DIR}/ut_tf_newapi.log
|
||||
|
||||
echo "cat run.sh..."
|
||||
sort run.sh -o run.sh
|
||||
cat run.sh | tee ${ut_log_name}
|
||||
echo "------UT start-------"
|
||||
bash -x run.sh 2>&1 | tee -a ${ut_log_name}
|
||||
echo "------UT end -------"
|
||||
|
||||
if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] || [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
|
||||
echo "Find errors in UT test, please check the output..."
|
||||
exit 1
|
||||
fi
|
||||
cp .coverage ${LOG_DIR}/.coverage.tfnewapi
|
||||
echo "UT finished successfully! "
|
@@ -0,0 +1,38 @@
|
||||
#!/bin/bash
|
||||
python -c "import neural_compressor as nc;print(nc.version.__version__)"
|
||||
test_case="run basic api quantization/benchmark/export/mixed_precision/distillation/scheduler/nas"
|
||||
echo "${test_case}"
|
||||
|
||||
echo "specify fwk version..."
|
||||
source /neural-compressor/.azure-pipelines/scripts/fwk_version.sh $1
|
||||
|
||||
echo "set up UT env..."
|
||||
bash /neural-compressor/.azure-pipelines/scripts/ut/env_setup.sh "${test_case}"
|
||||
export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/coverage.file
|
||||
lpot_path=$(python -c 'import neural_compressor; import os; print(os.path.dirname(neural_compressor.__file__))')
|
||||
cd /neural-compressor/test || exit 1
|
||||
find ./quantization* -name "test*.py" | sed 's,\.\/,coverage run --source='"${lpot_path}"' --append ,g' | sed 's/$/ --verbose/'> run.sh
|
||||
find ./benchmark* -name "test*.py" | sed 's,\.\/,coverage run --source='"${lpot_path}"' --append ,g' | sed 's/$/ --verbose/'>> run.sh
|
||||
find ./export* -name "test*.py" | sed 's,\.\/,coverage run --source='"${lpot_path}"' --append ,g' | sed 's/$/ --verbose/'>> run.sh
|
||||
find ./mixed_precision* -name "test*.py" | sed 's,\.\/,coverage run --source='"${lpot_path}"' --append ,g' | sed 's/$/ --verbose/'>> run.sh
|
||||
find ./distillation -name "test*.py" | sed 's,\.\/,coverage run --source='"${lpot_path}"' --append ,g' | sed 's/$/ --verbose/'>> run.sh
|
||||
find ./scheduler -name "test*.py" | sed 's,\.\/,coverage run --source='"${lpot_path}"' --append ,g' | sed 's/$/ --verbose/'>> run.sh
|
||||
find ./nas -name "test*.py" | sed 's,\.\/,coverage run --source='"${lpot_path}"' --append ,g' | sed 's/$/ --verbose/'>> run.sh
|
||||
|
||||
LOG_DIR=/neural-compressor/log_dir
|
||||
mkdir -p ${LOG_DIR}
|
||||
ut_log_name=${LOG_DIR}/ut_api.log
|
||||
|
||||
echo "cat run.sh..."
|
||||
sort run.sh -o run.sh
|
||||
cat run.sh | tee ${ut_log_name}
|
||||
echo "------UT start-------"
|
||||
bash -x run.sh 2>&1 | tee -a ${ut_log_name}
|
||||
echo "------UT end -------"
|
||||
|
||||
if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] || [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
|
||||
echo "Find errors in UT test, please check the output..."
|
||||
exit 1
|
||||
fi
|
||||
cp .coverage ${LOG_DIR}/.coverage.api
|
||||
echo "UT finished successfully! "
|
@@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
python -c "import neural_compressor as nc;print(nc.version.__version__)"
|
||||
test_case="run basic itex"
|
||||
echo "${test_case}"
|
||||
|
||||
echo "specify fwk version..."
|
||||
export itex_version='2.15.0.0'
|
||||
export tensorflow_version='2.15.0-official'
|
||||
export onnx_version='1.16.0'
|
||||
export onnxruntime_version='1.18.0'
|
||||
|
||||
echo "set up UT env..."
|
||||
bash /neural-compressor/.azure-pipelines/scripts/ut/env_setup.sh "${test_case}"
|
||||
export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/coverage.file
|
||||
lpot_path=$(python -c 'import neural_compressor; import os; print(os.path.dirname(neural_compressor.__file__))')
|
||||
cd /neural-compressor/test || exit 1
|
||||
find ./itex -name "test*.py" | sed 's,\.\/,coverage run --source='"${lpot_path}"' --append ,g' | sed 's/$/ --verbose/'> run.sh
|
||||
|
||||
LOG_DIR=/neural-compressor/log_dir
|
||||
mkdir -p ${LOG_DIR}
|
||||
ut_log_name=${LOG_DIR}/ut_itex.log
|
||||
|
||||
echo "cat run.sh..."
|
||||
sort run.sh -o run.sh
|
||||
cat run.sh | tee ${ut_log_name}
|
||||
echo "------UT start-------"
|
||||
bash -x run.sh 2>&1 | tee -a ${ut_log_name}
|
||||
echo "------UT end -------"
|
||||
|
||||
if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] || [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
|
||||
echo "Find errors in UT test, please check the output..."
|
||||
exit 1
|
||||
fi
|
||||
cp .coverage ${LOG_DIR}/.coverage.itex
|
||||
echo "UT finished successfully! "
|
@@ -0,0 +1,50 @@
|
||||
#!/bin/bash
|
||||
python -c "import neural_compressor as nc;print(nc.version.__version__)"
|
||||
test_case="run basic others"
|
||||
echo "${test_case}"
|
||||
|
||||
echo "specify fwk version..."
|
||||
source /neural-compressor/.azure-pipelines/scripts/fwk_version.sh $1
|
||||
|
||||
echo "set up UT env..."
|
||||
bash /neural-compressor/.azure-pipelines/scripts/ut/env_setup.sh "${test_case}"
|
||||
export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/coverage.file
|
||||
lpot_path=$(python -c 'import neural_compressor; import os; print(os.path.dirname(neural_compressor.__file__))')
|
||||
cd /neural-compressor/test || exit 1
|
||||
find . -name "test*.py" | sed 's,\.\/,coverage run --source='"${lpot_path}"' --append ,g' | sed 's/$/ --verbose/'> run.sh
|
||||
sed -i '/ adaptor\//d' run.sh
|
||||
sed -i '/ tfnewapi\//d' run.sh
|
||||
sed -i '/ itex\//d' run.sh
|
||||
sed -i '/ pruning_with_pt/d' run.sh
|
||||
sed -i '/ pruning_with_tf/d' run.sh
|
||||
sed -i '/ quantization/d' run.sh
|
||||
sed -i '/ benchmark/d' run.sh
|
||||
sed -i '/ export/d' run.sh
|
||||
sed -i '/ mixed_precision/d' run.sh
|
||||
sed -i '/ distillation\//d' run.sh
|
||||
sed -i '/ scheduler\//d' run.sh
|
||||
sed -i '/ nas\//d' run.sh
|
||||
sed -i '/ 3x\//d' run.sh
|
||||
sed -i '/ distributed\//d' run.sh
|
||||
|
||||
echo "copy model for dynas..."
|
||||
mkdir -p .torch/ofa_nets || true
|
||||
cp -r /tf_dataset/ut-localfile/ofa_mbv3_d234_e346_k357_w1.2 .torch/ofa_nets || true
|
||||
|
||||
LOG_DIR=/neural-compressor/log_dir
|
||||
mkdir -p ${LOG_DIR}
|
||||
ut_log_name=${LOG_DIR}/ut_others.log
|
||||
|
||||
echo "cat run.sh..."
|
||||
sort run.sh -o run.sh
|
||||
cat run.sh | tee ${ut_log_name}
|
||||
echo "------UT start-------"
|
||||
bash -x run.sh 2>&1 | tee -a ${ut_log_name}
|
||||
echo "------UT end -------"
|
||||
|
||||
if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] || [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
|
||||
echo "Find errors in UT test, please check the output..."
|
||||
exit 1
|
||||
fi
|
||||
cp .coverage ${LOG_DIR}/.coverage.others
|
||||
echo "UT finished successfully! "
|
@@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
python -c "import neural_compressor as nc;print(nc.version.__version__)"
|
||||
test_case="run basic pt pruning"
|
||||
echo "${test_case}"
|
||||
|
||||
echo "specify fwk version..."
|
||||
export pytorch_version='2.4.0+cpu'
|
||||
export torchvision_version='0.18.0+cpu'
|
||||
export ipex_version='2.4.0+cpu'
|
||||
|
||||
echo "set up UT env..."
|
||||
bash /neural-compressor/.azure-pipelines/scripts/ut/env_setup.sh "${test_case}"
|
||||
export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/coverage.file
|
||||
lpot_path=$(python -c 'import neural_compressor; import os; print(os.path.dirname(neural_compressor.__file__))')
|
||||
cd /neural-compressor/test || exit 1
|
||||
find ./pruning_with_pt -name "test*.py" | sed 's,\.\/,coverage run --source='"${lpot_path}"' --append ,g' | sed 's/$/ --verbose/'> run.sh
|
||||
# find ./distributed -name "test_distributed_pt_train.py" | sed 's,\.\/,coverage run --source='"${lpot_path}"' --append ,g' | sed 's/$/ --verbose/'>> run.sh
|
||||
|
||||
LOG_DIR=/neural-compressor/log_dir
|
||||
mkdir -p ${LOG_DIR}
|
||||
ut_log_name=${LOG_DIR}/ut_pt_pruning.log
|
||||
|
||||
echo "cat run.sh..."
|
||||
sort run.sh -o run.sh
|
||||
cat run.sh | tee ${ut_log_name}
|
||||
echo "------UT start-------"
|
||||
bash -x run.sh 2>&1 | tee -a ${ut_log_name}
|
||||
echo "------UT end -------"
|
||||
|
||||
if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] || [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
|
||||
echo "Find errors in UT test, please check the output..."
|
||||
exit 1
|
||||
fi
|
||||
cp .coverage ${LOG_DIR}/.coverage.pt_pruning
|
||||
echo "UT finished successfully! "
|
@@ -0,0 +1,33 @@
|
||||
#!/bin/bash
|
||||
python -c "import neural_compressor as nc;print(nc.version.__version__)"
|
||||
test_case="run basic tf pruning"
|
||||
echo "${test_case}"
|
||||
|
||||
echo "specify fwk version..."
|
||||
export tensorflow_version='2.14.0'
|
||||
|
||||
echo "set up UT env..."
|
||||
bash /neural-compressor/.azure-pipelines/scripts/ut/env_setup.sh "${test_case}"
|
||||
export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/coverage.file
|
||||
lpot_path=$(python -c 'import neural_compressor; import os; print(os.path.dirname(neural_compressor.__file__))')
|
||||
cd /neural-compressor/test || exit 1
|
||||
find ./pruning_with_tf -name "test*.py" | sed 's,\.\/,coverage run --source='"${lpot_path}"' --append ,g' | sed 's/$/ --verbose/'> run.sh
|
||||
find ./distributed -name "test_distributed_tf_dataloader.py" | sed 's,\.\/,coverage run --source='"${lpot_path}"' --append ,g' | sed 's/$/ --verbose/'>> run.sh
|
||||
|
||||
LOG_DIR=/neural-compressor/log_dir
|
||||
mkdir -p ${LOG_DIR}
|
||||
ut_log_name=${LOG_DIR}/ut_tf_pruning.log
|
||||
|
||||
echo "cat run.sh..."
|
||||
sort run.sh -o run.sh
|
||||
cat run.sh | tee ${ut_log_name}
|
||||
echo "------UT start-------"
|
||||
bash -x run.sh 2>&1 | tee -a ${ut_log_name}
|
||||
echo "------UT end -------"
|
||||
|
||||
if [ $(grep -c "FAILED" ${ut_log_name}) != 0 ] || [ $(grep -c "core dumped" ${ut_log_name}) != 0 ] || [ $(grep -c "ModuleNotFoundError:" ${ut_log_name}) != 0 ] || [ $(grep -c "OK" ${ut_log_name}) == 0 ];then
|
||||
echo "Find errors in UT test, please check the output..."
|
||||
exit 1
|
||||
fi
|
||||
cp .coverage ${LOG_DIR}/.coverage.tf_pruning
|
||||
echo "UT finished successfully! "
|
@@ -0,0 +1,42 @@
|
||||
parameters:
|
||||
- name: codeScanFileName
|
||||
type: string
|
||||
- name: uploadPath
|
||||
type: string
|
||||
|
||||
- name: codeScanContainerName
|
||||
type: string
|
||||
default: "codeScan"
|
||||
- name: scanModule
|
||||
type: string
|
||||
default: "neural_compressor"
|
||||
|
||||
steps:
|
||||
- template: docker-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "commonDockerConfig"
|
||||
repoName: "code-scan"
|
||||
repoTag: "1.0"
|
||||
dockerFileName: "DockerfileCodeScan"
|
||||
containerName: ${{ parameters.codeScanContainerName }}
|
||||
|
||||
- script: |
|
||||
docker exec ${{ parameters.codeScanContainerName }} bash -c "bash /neural-compressor/.azure-pipelines/scripts/codeScan/${{ parameters.codeScanFileName }}/${{ parameters.codeScanFileName }}.sh \
|
||||
--scan_module=${{ parameters.scanModule }}"
|
||||
displayName: "${{ parameters.codeScanFileName }} Check"
|
||||
|
||||
- task: PublishPipelineArtifact@1
|
||||
condition: succeededOrFailed()
|
||||
inputs:
|
||||
targetPath: .azure-pipelines/scripts/codeScan/scanLog/${{ parameters.uploadPath }}
|
||||
artifact: $(System.JobAttempt)_${{ parameters.codeScanFileName }}
|
||||
publishLocation: "pipeline"
|
||||
displayName: "PublishPipelineArtifact"
|
||||
|
||||
- task: Bash@3
|
||||
condition: always()
|
||||
inputs:
|
||||
targetType: "inline"
|
||||
script: |
|
||||
docker exec ${{ parameters.codeScanContainerName }} bash -c "rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* || true"
|
||||
displayName: "Docker clean up"
|
@@ -0,0 +1,103 @@
|
||||
parameters:
|
||||
- name: dockerConfigName
|
||||
type: string
|
||||
default: "commonDockerConfig"
|
||||
- name: repoName
|
||||
type: string
|
||||
default: "neural-compressor"
|
||||
- name: repoTag
|
||||
type: string
|
||||
default: "py310"
|
||||
- name: dockerFileName
|
||||
type: string
|
||||
default: "Dockerfile"
|
||||
- name: containerName
|
||||
type: string
|
||||
- name: repo
|
||||
type: string
|
||||
default: "https://github.com/intel/neural-compressor"
|
||||
- name: imageSource
|
||||
type: string
|
||||
default: "build"
|
||||
|
||||
steps:
|
||||
- task: Bash@3
|
||||
inputs:
|
||||
targetType: "inline"
|
||||
script: |
|
||||
docker ps -a
|
||||
if [[ $(docker ps -a | grep -i '${{ parameters.containerName }}'$) ]]; then
|
||||
docker start $(docker ps -aq --filter "name=${{ parameters.containerName }}")
|
||||
echo "remove left files through container ..."
|
||||
docker exec ${{ parameters.containerName }} bash -c "ls -a /neural-compressor && rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* && ls -a /neural-compressor || true"
|
||||
fi
|
||||
displayName: "Docker workspace clean up"
|
||||
|
||||
- ${{ if eq(parameters.dockerConfigName, 'commonDockerConfig') }}:
|
||||
- script: |
|
||||
rm -fr ${BUILD_SOURCESDIRECTORY} || sudo rm -fr ${BUILD_SOURCESDIRECTORY} || true
|
||||
displayName: "Clean workspace"
|
||||
|
||||
- checkout: self
|
||||
clean: true
|
||||
displayName: "Checkout out Repo"
|
||||
fetchDepth: 0
|
||||
|
||||
- ${{ if eq(parameters.dockerConfigName, 'gitCloneDockerConfig') }}:
|
||||
- script: |
|
||||
rm -fr ${BUILD_SOURCESDIRECTORY} || sudo rm -fr ${BUILD_SOURCESDIRECTORY} || true
|
||||
mkdir ${BUILD_SOURCESDIRECTORY}
|
||||
chmod 777 ${BUILD_SOURCESDIRECTORY}
|
||||
displayName: "Clean workspace"
|
||||
|
||||
- checkout: none
|
||||
|
||||
- script: |
|
||||
git clone ${{ parameters.repo }} ${BUILD_SOURCESDIRECTORY}
|
||||
git config --global --add safe.directory ${BUILD_SOURCESDIRECTORY}
|
||||
cd ${BUILD_SOURCESDIRECTORY}
|
||||
git checkout master
|
||||
displayName: "Checkout out master"
|
||||
|
||||
- ${{ if eq(parameters.imageSource, 'build') }}:
|
||||
- script: |
|
||||
docker image prune -a -f
|
||||
if [[ ! $(docker images | grep -i ${{ parameters.repoName }}:${{ parameters.repoTag }}) ]]; then
|
||||
docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/${{parameters.dockerFileName}}.devel -t ${{ parameters.repoName }}:${{ parameters.repoTag }} .
|
||||
fi
|
||||
docker images | grep -i ${{ parameters.repoName }}
|
||||
if [[ $? -ne 0 ]]; then
|
||||
echo "NO Such Repo"
|
||||
exit 1
|
||||
fi
|
||||
displayName: "Build develop docker image"
|
||||
|
||||
- ${{ if eq(parameters.imageSource, 'pull') }}:
|
||||
- script: |
|
||||
docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
|
||||
displayName: "Pull habana docker image"
|
||||
|
||||
- script: |
|
||||
docker stop $(docker ps -aq --filter "name=${{ parameters.containerName }}")
|
||||
docker rm -vf ${{ parameters.containerName }} || true
|
||||
env | sort
|
||||
displayName: "Clean docker container"
|
||||
|
||||
- ${{ if ne(parameters.containerName, '') }}:
|
||||
- task: Bash@3
|
||||
inputs:
|
||||
targetType: "inline"
|
||||
script: |
|
||||
if [[ "${{ parameters.imageSource }}" == "build" ]]; then
|
||||
docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
|
||||
-v ${BUILD_SOURCESDIRECTORY}:/neural-compressor -v /tf_dataset:/tf_dataset -v /tf_dataset2:/tf_dataset2 \
|
||||
${{ parameters.repoName }}:${{ parameters.repoTag }}
|
||||
else
|
||||
docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
|
||||
--runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host \
|
||||
-v ${BUILD_SOURCESDIRECTORY}:/neural-compressor vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
|
||||
docker exec ${{ parameters.containerName }} bash -c "ln -sf \$(which python3) /usr/bin/python"
|
||||
fi
|
||||
echo "Show the container list after docker run ... "
|
||||
docker ps -a
|
||||
displayName: "Docker run - ${{ parameters.containerName }} Container"
|
@@ -0,0 +1,80 @@
|
||||
parameters:
|
||||
- name: modelName
|
||||
type: string
|
||||
default: "resnet50v1.5"
|
||||
- name: framework
|
||||
type: string
|
||||
default: "tensorflow"
|
||||
- name: APIVersion
|
||||
type: string
|
||||
default: ""
|
||||
|
||||
- name: modelContainerName
|
||||
type: string
|
||||
default: "model"
|
||||
|
||||
steps:
|
||||
- template: docker-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "commonDockerConfig"
|
||||
repoName: "neural-compressor"
|
||||
repoTag: "py310"
|
||||
dockerFileName: "Dockerfile"
|
||||
containerName: ${{ parameters.modelContainerName }}
|
||||
|
||||
- script: |
|
||||
docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \
|
||||
&& bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='env_setup'"
|
||||
displayName: Env setup
|
||||
|
||||
- task: DownloadPipelineArtifact@2
|
||||
continueOnError: true
|
||||
inputs:
|
||||
source: "specific"
|
||||
artifact: ${{ parameters.framework }}_${{ parameters.modelName }}
|
||||
patterns: "**_summary.log"
|
||||
path: $(Build.SourcesDirectory)/.azure-pipelines/scripts/models/${{ parameters.modelName }}_refer_log
|
||||
project: $(System.TeamProject)
|
||||
pipeline: "Model-Test"
|
||||
runVersion: "specific"
|
||||
runId: $(refer_buildId)
|
||||
retryDownloadCount: 3
|
||||
displayName: "Download refer logs"
|
||||
|
||||
- script: |
|
||||
docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \
|
||||
&& bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='tuning'"
|
||||
displayName: Quantization
|
||||
|
||||
- ${{ if ne(parameters.APIVersion, '3x') }}:
|
||||
- script: |
|
||||
docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \
|
||||
&& bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='int8_benchmark' --USE_TUNE_ACC=$(USE_TUNE_ACC) --PERF_STABLE_CHECK=$(PERF_STABLE_CHECK)"
|
||||
displayName: INT8 Benchmark
|
||||
|
||||
- script: |
|
||||
docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \
|
||||
&& bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='fp32_benchmark' --USE_TUNE_ACC=$(USE_TUNE_ACC) --PERF_STABLE_CHECK=$(PERF_STABLE_CHECK)"
|
||||
displayName: FP32 Benchmark
|
||||
|
||||
- task: Bash@3
|
||||
inputs:
|
||||
targetType: "inline"
|
||||
script: |
|
||||
docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \
|
||||
&& bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='collect_log' --BUILD_BUILDID=$(Build.BuildId)"
|
||||
displayName: Collect log
|
||||
|
||||
- task: PublishPipelineArtifact@1
|
||||
inputs:
|
||||
targetPath: $(Build.SourcesDirectory)/.azure-pipelines/scripts/models/${{ parameters.modelName }}/
|
||||
artifact: ${{ parameters.framework }}_${{ parameters.modelName }}
|
||||
publishLocation: "pipeline"
|
||||
|
||||
- task: Bash@3
|
||||
condition: always()
|
||||
inputs:
|
||||
targetType: "inline"
|
||||
script: |
|
||||
docker exec ${{ parameters.modelContainerName }} bash -c "rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* || true"
|
||||
displayName: "Docker clean up"
|
@@ -0,0 +1,61 @@
|
||||
parameters:
|
||||
- name: dockerConfigName
|
||||
type: string
|
||||
default: "commonDockerConfig"
|
||||
- name: repo
|
||||
type: string
|
||||
default: "https://github.com/intel/neural-compressor"
|
||||
- name: utScriptFileName
|
||||
type: string
|
||||
- name: uploadPath
|
||||
type: string
|
||||
- name: utArtifact
|
||||
type: string
|
||||
- name: utTestMode
|
||||
type: string
|
||||
default: "coverage"
|
||||
- name: utContainerName
|
||||
type: string
|
||||
default: "utTest"
|
||||
- name: imageSource
|
||||
type: string
|
||||
default: "build"
|
||||
|
||||
steps:
|
||||
- template: docker-template.yml
|
||||
parameters:
|
||||
dockerConfigName: ${{ parameters.dockerConfigName }}
|
||||
repoName: "neural-compressor"
|
||||
repoTag: "py310"
|
||||
dockerFileName: "Dockerfile"
|
||||
containerName: ${{ parameters.utContainerName }}
|
||||
repo: ${{ parameters.repo }}
|
||||
imageSource: ${{ parameters.imageSource }}
|
||||
|
||||
- script: |
|
||||
docker exec ${{ parameters.utContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts \
|
||||
&& bash install_nc.sh ${{ parameters.utScriptFileName }} \
|
||||
&& bash ut/${{ parameters.utScriptFileName }}.sh ${{ parameters.utTestMode }}"
|
||||
displayName: "Run UT"
|
||||
|
||||
- task: PublishPipelineArtifact@1
|
||||
condition: succeededOrFailed()
|
||||
inputs:
|
||||
targetPath: ${{ parameters.uploadPath }}
|
||||
artifact: $(System.JobAttempt)_${{ parameters.utArtifact }}_report
|
||||
publishLocation: "pipeline"
|
||||
|
||||
- ${{ if eq(parameters.utTestMode, 'coverage') }}:
|
||||
- task: PublishPipelineArtifact@1
|
||||
inputs:
|
||||
targetPath: ${{ parameters.uploadPath }}
|
||||
artifact: ${{ parameters.utArtifact }}_coverage
|
||||
publishLocation: "pipeline"
|
||||
|
||||
- task: Bash@3
|
||||
condition: always()
|
||||
inputs:
|
||||
targetType: "inline"
|
||||
script: |
|
||||
docker exec ${{ parameters.utContainerName }} bash -c "rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* || true"
|
||||
displayName: "Docker clean up"
|
@@ -0,0 +1,118 @@
|
||||
trigger: none
|
||||
|
||||
pr:
|
||||
autoCancel: true
|
||||
drafts: false
|
||||
branches:
|
||||
include:
|
||||
- master
|
||||
paths:
|
||||
include:
|
||||
- .azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
|
||||
- .azure-pipelines/scripts/install_nc.sh
|
||||
- .azure-pipelines/ut-3x-pt-fp8.yml
|
||||
- .azure-pipelines/template/docker-template.yml
|
||||
- neural_compressor/common
|
||||
- neural_compressor/torch
|
||||
- test/3x/torch/algorithms/fp8_quant
|
||||
- test/3x/torch/quantization/fp8_quant
|
||||
- test/3x/torch/quantization/weight_only/test_rtn.py
|
||||
- test/3x/torch/quantization/weight_only/test_load.py
|
||||
- setup.py
|
||||
- requirements_pt.txt
|
||||
|
||||
pool: GAUDI
|
||||
|
||||
variables:
|
||||
IMAGE_NAME: "neural-compressor"
|
||||
IMAGE_TAG: "py310"
|
||||
UPLOAD_PATH: $(Build.SourcesDirectory)/log_dir
|
||||
DOWNLOAD_PATH: $(Build.SourcesDirectory)/log_dir
|
||||
ARTIFACT_NAME: "UT_coverage_report_3x_pt_fp8"
|
||||
REPO: $(Build.Repository.Uri)
|
||||
|
||||
stages:
|
||||
- stage: Torch_habana
|
||||
displayName: Torch 3x Habana FP8
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Torch 3x Habana FP8
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
imageSource: "pull"
|
||||
dockerConfigName: "commonDockerConfig"
|
||||
utScriptFileName: "3x/run_3x_pt_fp8"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut_3x"
|
||||
|
||||
- stage: Torch_habana_baseline
|
||||
displayName: Torch 3x Habana FP8 baseline
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Torch 3x Habana FP8 baseline
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
imageSource: "pull"
|
||||
dockerConfigName: "gitCloneDockerConfig"
|
||||
utScriptFileName: "3x/run_3x_pt_fp8"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut_3x_baseline"
|
||||
|
||||
- stage: Coverage
|
||||
displayName: "Coverage Compare"
|
||||
pool:
|
||||
vmImage: "ubuntu-latest"
|
||||
dependsOn: [Torch_habana, Torch_habana_baseline]
|
||||
jobs:
|
||||
- job: CollectDatafiles
|
||||
steps:
|
||||
- script: |
|
||||
if [[ ! $(docker images | grep -i ${IMAGE_NAME}:${IMAGE_TAG}) ]]; then
|
||||
docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/Dockerfile.devel -t ${IMAGE_NAME}:${IMAGE_TAG} .
|
||||
fi
|
||||
docker images | grep -i ${IMAGE_NAME}
|
||||
if [[ $? -ne 0 ]]; then
|
||||
echo "NO Such Repo"
|
||||
exit 1
|
||||
fi
|
||||
displayName: "Build develop docker image"
|
||||
|
||||
- task: DownloadPipelineArtifact@2
|
||||
inputs:
|
||||
artifact:
|
||||
patterns: '*_coverage/.coverage'
|
||||
path: $(DOWNLOAD_PATH)
|
||||
|
||||
- script: |
|
||||
echo "--- create container ---"
|
||||
docker run -d -it --name="collectLogs" -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor ${IMAGE_NAME}:${IMAGE_TAG} /bin/bash
|
||||
echo "--- docker ps ---"
|
||||
docker ps
|
||||
echo "--- collect logs ---"
|
||||
docker exec collectLogs /bin/bash +x -c "cd /neural-compressor/.azure-pipelines/scripts \
|
||||
&& bash install_nc.sh 3x_pt_fp8 \
|
||||
&& bash ut/3x/collect_log_3x.sh 3x_pt_fp8"
|
||||
displayName: "Collect UT Coverage"
|
||||
|
||||
- task: PublishCodeCoverageResults@2
|
||||
inputs:
|
||||
summaryFileLocation: $(Build.SourcesDirectory)/log_dir/coverage_PR/coverage.xml
|
||||
|
||||
- task: PublishPipelineArtifact@1
|
||||
condition: succeededOrFailed()
|
||||
inputs:
|
||||
targetPath: $(UPLOAD_PATH)
|
||||
artifact: $(ARTIFACT_NAME)
|
||||
publishLocation: "pipeline"
|
||||
|
||||
- task: Bash@3
|
||||
condition: always()
|
||||
inputs:
|
||||
targetType: "inline"
|
||||
script: |
|
||||
docker exec collectLogs bash -c "rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* || true"
|
||||
displayName: "Docker clean up"
|
@@ -0,0 +1,116 @@
|
||||
trigger: none
|
||||
|
||||
pr:
|
||||
autoCancel: true
|
||||
drafts: false
|
||||
branches:
|
||||
include:
|
||||
- master
|
||||
paths:
|
||||
include:
|
||||
- neural_compressor/common
|
||||
- neural_compressor/torch
|
||||
- test/3x/torch
|
||||
- test/3x/common
|
||||
- setup.py
|
||||
- requirements_pt.txt
|
||||
- .azure-pipelines/ut-3x-pt.yml
|
||||
- .azure-pipelines/template/docker-template.yml
|
||||
- .azure-pipelines/scripts/install_nc.sh
|
||||
- .azure-pipelines/scripts/ut/3x/run_3x_pt.sh
|
||||
|
||||
pool: ICX-16C
|
||||
|
||||
variables:
|
||||
IMAGE_NAME: "neural-compressor"
|
||||
IMAGE_TAG: "py310"
|
||||
UPLOAD_PATH: $(Build.SourcesDirectory)/log_dir
|
||||
DOWNLOAD_PATH: $(Build.SourcesDirectory)/log_dir
|
||||
ARTIFACT_NAME: "UT_coverage_report_3x_pt"
|
||||
REPO: $(Build.Repository.Uri)
|
||||
|
||||
stages:
|
||||
- stage: Torch
|
||||
displayName: Unit Test 3x Torch
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Unit Test 3x Torch
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "commonDockerConfig"
|
||||
utScriptFileName: "3x/run_3x_pt"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut_3x"
|
||||
|
||||
|
||||
- stage: Torch_baseline
|
||||
displayName: Unit Test 3x Torch baseline
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Unit Test 3x Torch baseline
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "gitCloneDockerConfig"
|
||||
utScriptFileName: "3x/run_3x_pt"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut_3x_baseline"
|
||||
repo: $(REPO)
|
||||
|
||||
- stage: Coverage
|
||||
displayName: "Coverage Compare"
|
||||
pool:
|
||||
vmImage: "ubuntu-latest"
|
||||
dependsOn: [Torch, Torch_baseline]
|
||||
jobs:
|
||||
- job: CollectDatafiles
|
||||
steps:
|
||||
- script: |
|
||||
if [[ ! $(docker images | grep -i ${IMAGE_NAME}:${IMAGE_TAG}) ]]; then
|
||||
docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/Dockerfile.devel -t ${IMAGE_NAME}:${IMAGE_TAG} .
|
||||
fi
|
||||
docker images | grep -i ${IMAGE_NAME}
|
||||
if [[ $? -ne 0 ]]; then
|
||||
echo "NO Such Repo"
|
||||
exit 1
|
||||
fi
|
||||
displayName: "Build develop docker image"
|
||||
|
||||
- task: DownloadPipelineArtifact@2
|
||||
inputs:
|
||||
artifact:
|
||||
patterns: '*_coverage/.coverage'
|
||||
path: $(DOWNLOAD_PATH)
|
||||
|
||||
- script: |
|
||||
echo "--- create container ---"
|
||||
docker run -d -it --name="collectLogs" -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor ${IMAGE_NAME}:${IMAGE_TAG} /bin/bash
|
||||
echo "--- docker ps ---"
|
||||
docker ps
|
||||
echo "--- collect logs ---"
|
||||
docker exec collectLogs /bin/bash +x -c "cd /neural-compressor/.azure-pipelines/scripts \
|
||||
&& bash install_nc.sh 3x_pt \
|
||||
&& bash ut/3x/collect_log_3x.sh 3x_pt"
|
||||
displayName: "Collect UT Coverage"
|
||||
|
||||
- task: PublishCodeCoverageResults@2
|
||||
inputs:
|
||||
summaryFileLocation: $(Build.SourcesDirectory)/log_dir/coverage_PR/coverage.xml
|
||||
|
||||
- task: PublishPipelineArtifact@1
|
||||
condition: succeededOrFailed()
|
||||
inputs:
|
||||
targetPath: $(UPLOAD_PATH)
|
||||
artifact: $(ARTIFACT_NAME)
|
||||
publishLocation: "pipeline"
|
||||
|
||||
- task: Bash@3
|
||||
condition: always()
|
||||
inputs:
|
||||
targetType: "inline"
|
||||
script: |
|
||||
docker exec collectLogs bash -c "rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* || true"
|
||||
displayName: "Docker clean up"
|
@@ -0,0 +1,113 @@
|
||||
trigger: none
|
||||
|
||||
pr:
|
||||
autoCancel: true
|
||||
drafts: false
|
||||
branches:
|
||||
include:
|
||||
- master
|
||||
paths:
|
||||
include:
|
||||
- neural_compressor/common
|
||||
- neural_compressor/tensorflow
|
||||
- test/3x/tensorflow
|
||||
- test/3x/common
|
||||
- setup.py
|
||||
- requirements_tf.txt
|
||||
- .azure-pipelines/scripts/ut/3x/run_3x_tf.sh
|
||||
- .azure-pipelines/template/docker-template.yml
|
||||
|
||||
pool: ICX-16C
|
||||
|
||||
variables:
|
||||
IMAGE_NAME: "neural-compressor"
|
||||
IMAGE_TAG: "py310"
|
||||
UPLOAD_PATH: $(Build.SourcesDirectory)/log_dir
|
||||
DOWNLOAD_PATH: $(Build.SourcesDirectory)/log_dir
|
||||
ARTIFACT_NAME: "UT_coverage_report_3x_tf"
|
||||
REPO: $(Build.Repository.Uri)
|
||||
|
||||
stages:
|
||||
- stage: TensorFlow
|
||||
displayName: Unit Test 3x TensorFlow
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Unit Test 3x TensorFlow
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "commonDockerConfig"
|
||||
utScriptFileName: "3x/run_3x_tf"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut_3x"
|
||||
|
||||
- stage: TensorFlow_baseline
|
||||
displayName: Unit Test 3x TensorFlow baseline
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Unit Test 3x TensorFlow baseline
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "gitCloneDockerConfig"
|
||||
utScriptFileName: "3x/run_3x_tf"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut_3x_baseline"
|
||||
repo: $(REPO)
|
||||
|
||||
- stage: Coverage
|
||||
displayName: "Coverage Compare"
|
||||
pool:
|
||||
vmImage: "ubuntu-latest"
|
||||
dependsOn: [TensorFlow, TensorFlow_baseline]
|
||||
jobs:
|
||||
- job: CollectDatafiles
|
||||
steps:
|
||||
- script: |
|
||||
if [[ ! $(docker images | grep -i ${IMAGE_NAME}:${IMAGE_TAG}) ]]; then
|
||||
docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/Dockerfile.devel -t ${IMAGE_NAME}:${IMAGE_TAG} .
|
||||
fi
|
||||
docker images | grep -i ${IMAGE_NAME}
|
||||
if [[ $? -ne 0 ]]; then
|
||||
echo "NO Such Repo"
|
||||
exit 1
|
||||
fi
|
||||
displayName: "Build develop docker image"
|
||||
|
||||
- task: DownloadPipelineArtifact@2
|
||||
inputs:
|
||||
artifact:
|
||||
patterns: '*_coverage/.coverage'
|
||||
path: $(DOWNLOAD_PATH)
|
||||
|
||||
- script: |
|
||||
echo "--- create container ---"
|
||||
docker run -d -it --name="collectLogs" -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor ${IMAGE_NAME}:${IMAGE_TAG} /bin/bash
|
||||
echo "--- docker ps ---"
|
||||
docker ps
|
||||
echo "--- collect logs ---"
|
||||
docker exec collectLogs /bin/bash +x -c "cd /neural-compressor/.azure-pipelines/scripts \
|
||||
&& bash install_nc.sh 3x_tf \
|
||||
&& bash ut/3x/collect_log_3x.sh 3x_tf"
|
||||
displayName: "Collect UT Coverage"
|
||||
|
||||
- task: PublishCodeCoverageResults@2
|
||||
inputs:
|
||||
summaryFileLocation: $(Build.SourcesDirectory)/log_dir/coverage_PR/coverage.xml
|
||||
|
||||
- task: PublishPipelineArtifact@1
|
||||
condition: succeededOrFailed()
|
||||
inputs:
|
||||
targetPath: $(UPLOAD_PATH)
|
||||
artifact: $(ARTIFACT_NAME)
|
||||
publishLocation: "pipeline"
|
||||
|
||||
- task: Bash@3
|
||||
condition: always()
|
||||
inputs:
|
||||
targetType: "inline"
|
||||
script: |
|
||||
docker exec collectLogs bash -c "rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* || true"
|
||||
displayName: "Docker clean up"
|
@@ -0,0 +1,287 @@
|
||||
trigger: none
|
||||
|
||||
pr:
|
||||
autoCancel: true
|
||||
drafts: false
|
||||
branches:
|
||||
include:
|
||||
- master
|
||||
paths:
|
||||
include:
|
||||
- neural_compressor
|
||||
- test
|
||||
- setup.py
|
||||
- requirements.txt
|
||||
- .azure-pipelines/ut-basic.yml
|
||||
- .azure-pipelines/template/docker-template.yml
|
||||
- .azure-pipelines/scripts/ut
|
||||
- .azure-pipelines/scripts/fwk_version.sh
|
||||
- .azure-pipelines/scripts/install_nc.sh
|
||||
exclude:
|
||||
- test/3x
|
||||
- neural_compressor/common
|
||||
- neural_compressor/torch
|
||||
- neural_compressor/tensorflow
|
||||
- neural_compressor/onnxrt
|
||||
- neural_compressor/transformers
|
||||
- neural_compressor/evaluation
|
||||
- .azure-pipelines/scripts/ut/3x
|
||||
|
||||
pool: ICX-16C
|
||||
|
||||
variables:
|
||||
IMAGE_NAME: "neural-compressor"
|
||||
IMAGE_TAG: "py310"
|
||||
UPLOAD_PATH: $(Build.SourcesDirectory)/log_dir
|
||||
DOWNLOAD_PATH: $(Build.SourcesDirectory)/log_dir
|
||||
ARTIFACT_NAME: "UT_coverage_report"
|
||||
REPO: $(Build.Repository.Uri)
|
||||
|
||||
stages:
|
||||
- stage: Adaptor
|
||||
displayName: Unit Test FWKs adaptor
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Test FWKs adaptor
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "commonDockerConfig"
|
||||
utScriptFileName: "run_basic_adaptor"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut_adaptor"
|
||||
|
||||
- stage: API
|
||||
displayName: Unit Test User facing API
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Test User facing API
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "commonDockerConfig"
|
||||
utScriptFileName: "run_basic_api"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut_api"
|
||||
|
||||
- stage: Pruning
|
||||
displayName: Unit Test Pruning
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Test PyTorch Pruning
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "commonDockerConfig"
|
||||
utScriptFileName: "run_basic_pt_pruning"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut_pt-pruning"
|
||||
- job:
|
||||
displayName: Test TensorFlow Pruning
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "commonDockerConfig"
|
||||
utScriptFileName: "run_basic_tf_pruning"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut_tf-pruning"
|
||||
|
||||
- stage: TFNewAPI
|
||||
displayName: Unit Test TF newAPI
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Test TF newAPI
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "commonDockerConfig"
|
||||
utScriptFileName: "run_basic_adaptor_tfnewapi"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut_tfnewapi"
|
||||
|
||||
- stage: ITEX
|
||||
displayName: Unit Test ITEX
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Test ITEX
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "commonDockerConfig"
|
||||
utScriptFileName: "run_basic_itex"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut_itex"
|
||||
|
||||
- stage: Others
|
||||
displayName: Unit Test other basic case
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Test other basic case
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "commonDockerConfig"
|
||||
utScriptFileName: "run_basic_others"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut_others"
|
||||
|
||||
- stage: Adaptor_base
|
||||
displayName: Unit Test FWKs adaptor baseline
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Test FWKs adaptor baseline
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "gitCloneDockerConfig"
|
||||
utScriptFileName: "run_basic_adaptor"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut-base_adaptor"
|
||||
repo: $(REPO)
|
||||
|
||||
- stage: API_base
|
||||
displayName: Unit Test User facing API baseline
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Test User facing API baseline
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "gitCloneDockerConfig"
|
||||
utScriptFileName: "run_basic_api"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut-base_api"
|
||||
repo: $(REPO)
|
||||
|
||||
- stage: Pruning_base
|
||||
displayName: Unit Test Pruning baseline
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Test PyTorch Pruning baseline
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "gitCloneDockerConfig"
|
||||
utScriptFileName: "run_basic_pt_pruning"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut-base_pt-pruning"
|
||||
repo: $(REPO)
|
||||
- job:
|
||||
displayName: Test TensorFlow Pruning baseline
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "gitCloneDockerConfig"
|
||||
utScriptFileName: "run_basic_tf_pruning"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut-base_tf-pruning"
|
||||
repo: $(REPO)
|
||||
|
||||
- stage: TFNewAPI_base
|
||||
displayName: Unit Test TF newAPI baseline
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Test TF newAPI baseline
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "gitCloneDockerConfig"
|
||||
utScriptFileName: "run_basic_adaptor_tfnewapi"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut-base_tfnewapi"
|
||||
repo: $(REPO)
|
||||
|
||||
- stage: ITEX_base
|
||||
displayName: Unit Test ITEX baseline
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Test ITEX baseline
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "gitCloneDockerConfig"
|
||||
utScriptFileName: "run_basic_itex"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut-base_itex"
|
||||
repo: $(REPO)
|
||||
|
||||
- stage: Others_base
|
||||
displayName: Unit Test other cases baseline
|
||||
dependsOn: []
|
||||
jobs:
|
||||
- job:
|
||||
displayName: Test other cases baseline
|
||||
steps:
|
||||
- template: template/ut-template.yml
|
||||
parameters:
|
||||
dockerConfigName: "gitCloneDockerConfig"
|
||||
utScriptFileName: "run_basic_others"
|
||||
uploadPath: $(UPLOAD_PATH)
|
||||
utArtifact: "ut-base_others"
|
||||
repo: $(REPO)
|
||||
|
||||
- stage: Coverage
|
||||
displayName: "Coverage Compare"
|
||||
pool:
|
||||
vmImage: "ubuntu-latest"
|
||||
dependsOn: [Adaptor, API, Pruning, TFNewAPI, ITEX, Others, Adaptor_base, API_base, Pruning_base, TFNewAPI_base, ITEX_base, Others_base]
|
||||
jobs:
|
||||
- job: CollectDatafiles
|
||||
steps:
|
||||
- script: |
|
||||
if [[ ! $(docker images | grep -i ${IMAGE_NAME}:${IMAGE_TAG}) ]]; then
|
||||
docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/Dockerfile.devel -t ${IMAGE_NAME}:${IMAGE_TAG} .
|
||||
fi
|
||||
docker images | grep -i ${IMAGE_NAME}
|
||||
if [[ $? -ne 0 ]]; then
|
||||
echo "NO Such Repo"
|
||||
exit 1
|
||||
fi
|
||||
displayName: "Build develop docker image"
|
||||
|
||||
- task: DownloadPipelineArtifact@2
|
||||
inputs:
|
||||
artifact:
|
||||
patterns: '*_coverage/.coverage.*'
|
||||
path: $(DOWNLOAD_PATH)
|
||||
|
||||
- script: |
|
||||
echo "--- create container ---"
|
||||
docker run -d -it --name="collectLogs" -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor ${IMAGE_NAME}:${IMAGE_TAG} /bin/bash
|
||||
echo "--- docker ps ---"
|
||||
docker ps
|
||||
echo "--- collect logs ---"
|
||||
docker exec collectLogs /bin/bash +x -c "cd /neural-compressor/.azure-pipelines/scripts \
|
||||
&& bash install_nc.sh \
|
||||
&& bash ut/collect_log.sh"
|
||||
displayName: "Collect UT Coverage"
|
||||
|
||||
- task: PublishCodeCoverageResults@2
|
||||
inputs:
|
||||
summaryFileLocation: $(Build.SourcesDirectory)/log_dir/coverage_PR/coverage.xml
|
||||
|
||||
- task: PublishPipelineArtifact@1
|
||||
condition: succeededOrFailed()
|
||||
inputs:
|
||||
targetPath: $(UPLOAD_PATH)
|
||||
artifact: $(ARTIFACT_NAME)
|
||||
publishLocation: "pipeline"
|
||||
|
||||
- task: Bash@3
|
||||
condition: always()
|
||||
inputs:
|
||||
targetType: "inline"
|
||||
script: |
|
||||
docker exec collectLogs bash -c "rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* || true"
|
||||
displayName: "Docker clean up"
|
107
uukssw/quote1/_ref/neural-compressor/.github/checkgroup.yml
vendored
Normal file
107
uukssw/quote1/_ref/neural-compressor/.github/checkgroup.yml
vendored
Normal file
@@ -0,0 +1,107 @@
|
||||
custom_service_name: "CI checker"
|
||||
subprojects:
|
||||
- id: "Code Scan Tests workflow"
|
||||
paths:
|
||||
- "neural_compressor/**"
|
||||
- "setup.py"
|
||||
- "requirements.txt"
|
||||
- ".azure-pipelines/code-scan.yml"
|
||||
- ".azure-pipelines/scripts/codeScan/**"
|
||||
checks:
|
||||
- "Code-Scan"
|
||||
- "Code-Scan (Bandit Code Scan Bandit)"
|
||||
- "Code-Scan (DocStyle Code Scan DocStyle)"
|
||||
|
||||
- id: "Model Tests workflow"
|
||||
paths:
|
||||
- "neural_compressor/**"
|
||||
- "setup.py"
|
||||
- "requirements.txt"
|
||||
- ".azure-pipelines/scripts/models/**"
|
||||
- "examples/tensorflow/oob_models/quantization/ptq/**"
|
||||
- "!test"
|
||||
- "!neural_compressor/common/**"
|
||||
- "!neural_compressor/torch/**"
|
||||
- "!neural_compressor/tensorflow/**"
|
||||
- "!neural_compressor/onnxrt/**"
|
||||
checks:
|
||||
- "Model-Test"
|
||||
- "Model-Test (Generate Report GenerateReport)"
|
||||
- "Model-Test (Run ONNX Model resnet50-v1-12)"
|
||||
- "Model-Test (Run PyTorch Model resnet18_fx)"
|
||||
- "Model-Test (Run TensorFlow Model resnet50v1.5)"
|
||||
- "Model-Test (Run TensorFlow Model ssd_resnet50_v1)"
|
||||
|
||||
- id: "Model Tests 3x workflow"
|
||||
paths:
|
||||
- "neural_compressor/common/**"
|
||||
- "neural_compressor/torch/**"
|
||||
- "examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/**"
|
||||
- "setup.py"
|
||||
- "requirements_pt.txt"
|
||||
- ".azure-pipelines/scripts/models/**"
|
||||
checks:
|
||||
- "Model-Test-3x"
|
||||
- "Model-Test-3x (Generate Report GenerateReport)"
|
||||
- "Model-Test-3x (Run PyTorch Model opt_125m_woq_gptq_int4)"
|
||||
- "Model-Test-3x (Run PyTorch Model opt_125m_woq_gptq_nf4_dq_bnb)"
|
||||
- "Model-Test-3x (Run PyTorch Model opt_125m_woq_gptq_int4_dq_ggml)"
|
||||
|
||||
- id: "Unit Tests basic workflow"
|
||||
paths:
|
||||
- "neural_compressor/**"
|
||||
- "test/**"
|
||||
- "setup.py"
|
||||
- "requirements.txt"
|
||||
- ".azure-pipelines/scripts/ut/**"
|
||||
- "!test/3x/**"
|
||||
- "!neural_compressor/common/**"
|
||||
- "!neural_compressor/torch/**"
|
||||
- "!neural_compressor/tensorflow/**"
|
||||
- "!neural_compressor/onnxrt/**"
|
||||
- "!.azure-pipelines/scripts/ut/3x/**"
|
||||
checks:
|
||||
- "UT-Basic"
|
||||
- "UT-Basic (Coverage Compare CollectDatafiles)"
|
||||
- "UT-Basic (Unit Test FWKs adaptor Test FWKs adaptor)"
|
||||
- "UT-Basic (Unit Test FWKs adaptor baseline Test FWKs adaptor baseline)"
|
||||
- "UT-Basic (Unit Test ITEX Test ITEX)"
|
||||
- "UT-Basic (Unit Test ITEX baseline Test ITEX baseline)"
|
||||
- "UT-Basic (Unit Test Pruning Test PyTorch Pruning)"
|
||||
- "UT-Basic (Unit Test Pruning Test TensorFlow Pruning)"
|
||||
- "UT-Basic (Unit Test Pruning baseline Test PyTorch Pruning baseline)"
|
||||
- "UT-Basic (Unit Test Pruning baseline Test TensorFlow Pruning baseline)"
|
||||
- "UT-Basic (Unit Test TF newAPI Test TF newAPI)"
|
||||
- "UT-Basic (Unit Test TF newAPI baseline Test TF newAPI baseline)"
|
||||
- "UT-Basic (Unit Test User facing API Test User facing API)"
|
||||
- "UT-Basic (Unit Test User facing API baseline Test User facing API baseline)"
|
||||
- "UT-Basic (Unit Test other basic case Test other basic case)"
|
||||
- "UT-Basic (Unit Test other cases baseline Test other cases baseline)"
|
||||
|
||||
- id: "Unit Tests 3x-TensorFlow workflow"
|
||||
paths:
|
||||
- "neural_compressor/common/**"
|
||||
- "neural_compressor/tensorflow/**"
|
||||
- "test/3x/tensorflow/**"
|
||||
- "setup.py"
|
||||
- "requirements_tf.txt"
|
||||
checks:
|
||||
- "UT-3x-TensorFlow"
|
||||
- "UT-3x-TensorFlow (Coverage Compare CollectDatafiles)"
|
||||
- "UT-3x-TensorFlow (Unit Test 3x TensorFlow Unit Test 3x TensorFlow)"
|
||||
- "UT-3x-TensorFlow (Unit Test 3x TensorFlow baseline Unit Test 3x TensorFlow baseline)"
|
||||
|
||||
- id: "Unit Tests 3x-PyTorch workflow"
|
||||
paths:
|
||||
- "neural_compressor/common/**"
|
||||
- "neural_compressor/torch/**"
|
||||
- "test/3x/torch/**"
|
||||
- "test/3x/common/**"
|
||||
- "setup.py"
|
||||
- "requirements_pt.txt"
|
||||
- ".azure-pipelines/scripts/ut/3x/collect_log_3x.sh"
|
||||
checks:
|
||||
- "UT-3x-Torch"
|
||||
- "UT-3x-Torch (Coverage Compare CollectDatafiles)"
|
||||
- "UT-3x-Torch (Unit Test 3x Torch Unit Test 3x Torch)"
|
||||
- "UT-3x-Torch (Unit Test 3x Torch baseline Unit Test 3x Torch baseline)"
|
13
uukssw/quote1/_ref/neural-compressor/.github/license_template.txt
vendored
Normal file
13
uukssw/quote1/_ref/neural-compressor/.github/license_template.txt
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
Copyright (c) 2025 Intel Corporation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
20
uukssw/quote1/_ref/neural-compressor/.github/pull_request_template.md
vendored
Normal file
20
uukssw/quote1/_ref/neural-compressor/.github/pull_request_template.md
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
## Type of Change
|
||||
|
||||
feature or bug fix or documentation or validation or others
|
||||
API changed or not
|
||||
|
||||
## Description
|
||||
|
||||
detail description
|
||||
|
||||
## Expected Behavior & Potential Risk
|
||||
|
||||
the expected behavior that triggered by this PR
|
||||
|
||||
## How has this PR been tested?
|
||||
|
||||
how to reproduce the test (including hardware information)
|
||||
|
||||
## Dependency Change?
|
||||
|
||||
any library dependency introduced or removed
|
18
uukssw/quote1/_ref/neural-compressor/.github/workflows/Scanner_Bdba.yml
vendored
Normal file
18
uukssw/quote1/_ref/neural-compressor/.github/workflows/Scanner_Bdba.yml
vendored
Normal file
@@ -0,0 +1,18 @@
|
||||
name: Scanner BDBA
|
||||
permissions: read-all
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
bdba_job:
|
||||
name: BDBA Scan
|
||||
uses: intel-innersource/frameworks.ai.infrastructure.code-scan-tools/.github/workflows/Scanner_Bdba.yml@one-ci-cd
|
||||
with:
|
||||
repos: ${{ github.event.repository.name }}
|
||||
refs: ${{ github.ref_name }}
|
||||
group: "22"
|
||||
runners: "['self-hosted']"
|
||||
secrets:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
BDBA_TOKEN: ${{ secrets.BDBA_TOKEN }}
|
20
uukssw/quote1/_ref/neural-compressor/.github/workflows/Scanner_Coverity.yml
vendored
Normal file
20
uukssw/quote1/_ref/neural-compressor/.github/workflows/Scanner_Coverity.yml
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
name: Scanner Coverity
|
||||
permissions: read-all
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
coverity_job:
|
||||
uses: intel-innersource/frameworks.ai.infrastructure.code-scan-tools/.github/workflows/Scanner_Coverity.yml@one-ci-cd
|
||||
with:
|
||||
repos: ${{ github.event.repository.name }}
|
||||
refs: ${{ github.ref_name }}
|
||||
projectType: python
|
||||
url: 'https://coverityent.devtools.intel.com/prod1'
|
||||
stream: 'IntelNeuralCompressor-master'
|
||||
runners: "['self-hosted']"
|
||||
secrets:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
USER: ${{secrets.COVERITY_USER }}
|
||||
PASSWORD : ${{secrets.COVERITY_PASSWORD }}
|
16
uukssw/quote1/_ref/neural-compressor/.github/workflows/Scanner_McAfee.yml
vendored
Normal file
16
uukssw/quote1/_ref/neural-compressor/.github/workflows/Scanner_McAfee.yml
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
name: Virus Scan
|
||||
permissions: read-all
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
virus:
|
||||
name: McAfee Virus Scan
|
||||
runs-on: self-hosted
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Execute Scan
|
||||
uses: intel-innersource/frameworks.devops.github.actions.mcafee@main
|
||||
with:
|
||||
scan_path: neural_compressor
|
27
uukssw/quote1/_ref/neural-compressor/.github/workflows/probot.yml
vendored
Normal file
27
uukssw/quote1/_ref/neural-compressor/.github/workflows/probot.yml
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
name: Probot
|
||||
permissions: read-all
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
required-jobs:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.pull_request.draft == false
|
||||
timeout-minutes: 361 # in case something is wrong with the internal timeout
|
||||
steps:
|
||||
- uses: XuehaoSun/probot@0.2
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
job: check-group
|
||||
interval: 180 # seconds
|
||||
timeout: 360 # minutes
|
||||
maintainers: "[XuehaoSun](https://github.com/XuehaoSun)"
|
||||
owner: "[chensuyue](https://github.com/chensuyue) or [XuehaoSun](https://github.com/XuehaoSun)"
|
28
uukssw/quote1/_ref/neural-compressor/.github/workflows/publish.yml
vendored
Normal file
28
uukssw/quote1/_ref/neural-compressor/.github/workflows/publish.yml
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
name: Publish
|
||||
permissions: {}
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: write
|
||||
contents: write
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Build Online Document
|
||||
run: |
|
||||
git config --local --get remote.origin.url
|
||||
cd docs/build_docs
|
||||
bash build.sh latest
|
||||
|
||||
- name: Push to github
|
||||
uses: peaceiris/actions-gh-pages@v3
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
publish_dir: ./build_tmp/gh-pages
|
||||
publish_branch: gh-pages
|
24
uukssw/quote1/_ref/neural-compressor/.gitignore
vendored
Normal file
24
uukssw/quote1/_ref/neural-compressor/.gitignore
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
*.pyc
|
||||
.vscode
|
||||
.idea
|
||||
/venv/
|
||||
*/__pycache__
|
||||
.ipynb_checkpoints/
|
||||
*.snapshot
|
||||
*.csv
|
||||
*.pb
|
||||
*.ckpt
|
||||
*.log
|
||||
*.swp
|
||||
*.onnx
|
||||
*.so
|
||||
*.egg-info/
|
||||
.eggs/
|
||||
dist/
|
||||
tags
|
||||
build/
|
||||
_build
|
||||
lpot_workspace/
|
||||
.torch/
|
||||
node_modules
|
||||
build_tmp
|
161
uukssw/quote1/_ref/neural-compressor/.pre-commit-config.yaml
Normal file
161
uukssw/quote1/_ref/neural-compressor/.pre-commit-config.yaml
Normal file
@@ -0,0 +1,161 @@
|
||||
ci:
|
||||
autofix_prs: true
|
||||
autoupdate_schedule: quarterly
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: end-of-file-fixer
|
||||
files: (.*\.(py|md|rst|yaml|yml))$
|
||||
exclude: |
|
||||
(?x)^(
|
||||
examples/.+|
|
||||
neural_compressor/torch/algorithms/fp8_quant/.+|
|
||||
test/3x/torch/algorithms/fp8_quant/.+
|
||||
)$
|
||||
- id: check-json
|
||||
exclude: |
|
||||
(?x)^(
|
||||
.vscode/settings_recommended.json
|
||||
)$
|
||||
- id: check-yaml
|
||||
exclude: |
|
||||
(?x)^(
|
||||
conda_meta/|
|
||||
neural_compressor/template/pruning.yaml|
|
||||
neural_compressor/adaptor/tensorflow_itex.yaml|
|
||||
neural_compressor/adaptor/tensorflow.yaml
|
||||
)$
|
||||
- id: debug-statements
|
||||
- id: file-contents-sorter
|
||||
exclude: |
|
||||
(?x)^(
|
||||
examples/.+
|
||||
)$
|
||||
args: [--unique]
|
||||
- id: requirements-txt-fixer
|
||||
exclude: |
|
||||
(?x)^(
|
||||
examples/.+
|
||||
)$
|
||||
- id: trailing-whitespace
|
||||
files: (.*\.(py|rst|cmake|yaml|yml))$
|
||||
exclude: |
|
||||
(?x)^(
|
||||
examples/.+|
|
||||
neural_compressor/torch/utils/.+|
|
||||
neural_compressor/torch/algorithms/fp8_quant/.+|
|
||||
test/3x/torch/quantization/.+
|
||||
)$
|
||||
|
||||
- repo: https://github.com/Lucas-C/pre-commit-hooks
|
||||
rev: v1.5.5
|
||||
hooks:
|
||||
- id: insert-license
|
||||
files: |
|
||||
(?x)^(
|
||||
neural_compressor/.*(py|yaml|yml|sh)
|
||||
)$
|
||||
args:
|
||||
[
|
||||
--license-filepath=.github/license_template.txt,
|
||||
--use-current-year,
|
||||
--detect-license-in-X-top-lines=40,
|
||||
--skip-license-insertion-comment=Copyright,
|
||||
]
|
||||
|
||||
- repo: https://github.com/asottile/yesqa
|
||||
rev: v1.5.0
|
||||
hooks:
|
||||
- id: yesqa
|
||||
name: Unused noqa
|
||||
|
||||
- repo: https://github.com/pycqa/isort
|
||||
rev: 5.13.2
|
||||
hooks:
|
||||
- id: isort
|
||||
exclude: |
|
||||
(?x)^(
|
||||
examples/.+|
|
||||
neural_compressor/torch/algorithms/fp8_quant/.+|
|
||||
test/3x/torch/.+
|
||||
)$
|
||||
|
||||
- repo: https://github.com/PyCQA/docformatter
|
||||
rev: 06907d0
|
||||
hooks:
|
||||
- id: docformatter
|
||||
args: [
|
||||
--in-place,
|
||||
--wrap-summaries=0, # 0 means disable wrap
|
||||
--wrap-descriptions=0, # 0 means disable wrap
|
||||
--black,
|
||||
--style=google,
|
||||
]
|
||||
exclude: |
|
||||
(?x)^(
|
||||
examples/.+|
|
||||
neural_compressor/torch/algorithms/fp8_quant/.+|
|
||||
test/3x/torch/.+
|
||||
)$
|
||||
|
||||
- repo: https://github.com/psf/black.git
|
||||
rev: 24.10.0
|
||||
hooks:
|
||||
- id: black
|
||||
files: (.*\.py)$
|
||||
exclude: |
|
||||
(?x)^(
|
||||
neural_compressor/conf/config.py|
|
||||
neural_compressor/conf/pythonic_config.py|
|
||||
examples/.+|
|
||||
neural_compressor/torch/algorithms/fp8_quant/.+|
|
||||
test/3x/torch/.+
|
||||
)$
|
||||
|
||||
- repo: https://github.com/asottile/blacken-docs
|
||||
rev: 1.19.1
|
||||
hooks:
|
||||
- id: blacken-docs
|
||||
args: [--line-length=120, --skip-errors]
|
||||
additional_dependencies:
|
||||
- black==24.10.0
|
||||
exclude: |
|
||||
(?x)^(
|
||||
examples/.+|
|
||||
docs/source-app|
|
||||
neural_compressor/torch/algorithms/fp8_quant/.+|
|
||||
test/3x/torch/.+
|
||||
)$
|
||||
|
||||
- repo: https://github.com/codespell-project/codespell
|
||||
rev: v2.3.0
|
||||
hooks:
|
||||
- id: codespell
|
||||
args: [-w]
|
||||
additional_dependencies:
|
||||
- tomli
|
||||
exclude: |
|
||||
(?x)^(
|
||||
examples/.*(txt|patch)|
|
||||
examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prompt.json|
|
||||
examples/notebook/dynas/ResNet50_Quantiation_Search_Supernet_NAS.ipynb|
|
||||
examples/notebook/dynas/Transformer_LT_Supernet_NAS.ipynb|
|
||||
neural_compressor/torch/algorithms/fp8_quant/internal/diffusion_evaluation/SR_evaluation/imagenet1000_clsidx_to_labels.txt|
|
||||
neural_compressor/evaluation/hf_eval/datasets/cnn_validation.json|
|
||||
neural_compressor/torch/algorithms/fp8_quant/.+|
|
||||
test/3x/torch/.+
|
||||
)$
|
||||
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.8.6
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix, --exit-non-zero-on-fix, --no-cache]
|
||||
exclude: |
|
||||
(?x)^(
|
||||
examples/.+|
|
||||
neural_compressor/torch/algorithms/fp8_quant/.+|
|
||||
test/3x/torch/.+
|
||||
)$
|
201
uukssw/quote1/_ref/neural-compressor/LICENSE
Normal file
201
uukssw/quote1/_ref/neural-compressor/LICENSE
Normal file
@@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
============================================================================
|
||||
|
||||
Copyright 2016-2019 Intel Corporation
|
||||
Copyright 2018 YANDEX LLC
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
This distribution includes third party software ("third party programs").
|
||||
This third party software, even if included with the distribution of
|
||||
the Intel software, may be governed by separate license terms, including
|
||||
without limitation, third party license terms, other Intel software license
|
||||
terms, and open source software license terms. These separate license terms
|
||||
govern your use of the third party programs as set forth in the
|
||||
"THIRD-PARTY-PROGRAMS" file.
|
199
uukssw/quote1/_ref/neural-compressor/README.md
Normal file
199
uukssw/quote1/_ref/neural-compressor/README.md
Normal file
@@ -0,0 +1,199 @@
|
||||
<div align="center">
|
||||
|
||||
Intel® Neural Compressor
|
||||
===========================
|
||||
<h3> An open-source Python library supporting popular model compression techniques on all mainstream deep learning frameworks (TensorFlow, PyTorch, and ONNX Runtime)</h3>
|
||||
|
||||
[](https://github.com/intel/neural-compressor)
|
||||
[](https://github.com/intel/neural-compressor/releases)
|
||||
[](https://github.com/intel/neural-compressor/blob/master/LICENSE)
|
||||
[](https://github.com/intel/neural-compressor)
|
||||
[](https://pepy.tech/project/neural-compressor)
|
||||
|
||||
[Architecture](./docs/source/3x/design.md#architecture) | [Workflow](./docs/source/3x/design.md#workflows) | [LLMs Recipes](./docs/source/llm_recipes.md) | [Results](./docs/source/validated_model_list.md) | [Documentations](https://intel.github.io/neural-compressor)
|
||||
|
||||
---
|
||||
<div align="left">
|
||||
|
||||
Intel® Neural Compressor aims to provide popular model compression techniques such as quantization, pruning (sparsity), distillation, and neural architecture search on mainstream frameworks such as [TensorFlow](https://www.tensorflow.org/), [PyTorch](https://pytorch.org/), and [ONNX Runtime](https://onnxruntime.ai/),
|
||||
as well as Intel extensions such as [Intel Extension for TensorFlow](https://github.com/intel/intel-extension-for-tensorflow) and [Intel Extension for PyTorch](https://github.com/intel/intel-extension-for-pytorch).
|
||||
In particular, the tool provides the key features, typical examples, and open collaborations as below:
|
||||
|
||||
* Support a wide range of Intel hardware such as [Intel Gaudi Al Accelerators](https://www.intel.com/content/www/us/en/products/details/processors/ai-accelerators/gaudi-overview.html), [Intel Core Ultra Processors](https://www.intel.com/content/www/us/en/products/details/processors/core-ultra.html), [Intel Xeon Scalable Processors](https://www.intel.com/content/www/us/en/products/details/processors/xeon/scalable.html), [Intel Xeon CPU Max Series](https://www.intel.com/content/www/us/en/products/details/processors/xeon/max-series.html), [Intel Data Center GPU Flex Series](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/data-center-gpu/flex-series.html), and [Intel Data Center GPU Max Series](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/data-center-gpu/max-series.html) with extensive testing;
|
||||
support AMD CPU, ARM CPU, and NVidia GPU through ONNX Runtime with limited testing; support NVidia GPU for some WOQ algorithms like AutoRound and HQQ.
|
||||
|
||||
* Validate popular LLMs such as [LLama2](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [Falcon](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [GPT-J](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [Bloom](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [OPT](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), and more than 10,000 broad models such as [Stable Diffusion](/examples/pytorch/nlp/huggingface_models/text-to-image/quantization), [BERT-Large](/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx), and [ResNet50](/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx) from popular model hubs such as [Hugging Face](https://huggingface.co/), [Torch Vision](https://pytorch.org/vision/stable/index.html), and [ONNX Model Zoo](https://github.com/onnx/models#models), with automatic [accuracy-driven](/docs/source/design.md#workflow) quantization strategies
|
||||
|
||||
* Collaborate with cloud marketplaces such as [Google Cloud Platform](https://console.cloud.google.com/marketplace/product/bitnami-launchpad/inc-tensorflow-intel?project=verdant-sensor-286207), [Amazon Web Services](https://aws.amazon.com/marketplace/pp/prodview-yjyh2xmggbmga#pdp-support), and [Azure](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/bitnami.inc-tensorflow-intel), software platforms such as [Alibaba Cloud](https://www.intel.com/content/www/us/en/developer/articles/technical/quantize-ai-by-oneapi-analytics-on-alibaba-cloud.html), [Tencent TACO](https://new.qq.com/rain/a/20221202A00B9S00) and [Microsoft Olive](https://github.com/microsoft/Olive), and open AI ecosystem such as [Hugging Face](https://huggingface.co/blog/intel), [PyTorch](https://pytorch.org/tutorials/recipes/intel_neural_compressor_for_pytorch.html), [ONNX](https://github.com/onnx/models#models), [ONNX Runtime](https://github.com/microsoft/onnxruntime), and [Lightning AI](https://github.com/Lightning-AI/lightning/blob/master/docs/source-pytorch/advanced/post_training_quantization.rst)
|
||||
|
||||
## What's New
|
||||
* [2024/10] [Transformers-like API](./docs/source/3x/transformers_like_api.md) for INT4 inference on Intel CPU and GPU.
|
||||
* [2024/07] From 3.0 release, framework extension API is recommended to be used for quantization.
|
||||
* [2024/07] Performance optimizations and usability improvements on [client-side](./docs/source/3x/client_quant.md).
|
||||
|
||||
## Installation
|
||||
Choose the necessary framework dependencies to install based on your deploy environment.
|
||||
### Install Framework
|
||||
* [Install intel_extension_for_pytorch for CPU](https://intel.github.io/intel-extension-for-pytorch/cpu/latest/)
|
||||
* [Install intel_extension_for_pytorch for XPU](https://intel.github.io/intel-extension-for-pytorch/xpu/latest/)
|
||||
* [Use Docker Image with torch installed for HPU](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#bare-metal-fresh-os-single-click)
|
||||
**Note**: There is a version mapping between Intel Neural Compressor and Gaudi Software Stack, please refer to this [table](./docs/source/3x/gaudi_version_map.md) and make sure to use a matched combination.
|
||||
* [Install torch for other platform](https://pytorch.org/get-started/locally)
|
||||
* [Install TensorFlow](https://www.tensorflow.org/install)
|
||||
|
||||
### Install Neural Compressor from pypi
|
||||
```
|
||||
# Install 2.X API + Framework extension API + PyTorch dependency
|
||||
pip install neural-compressor[pt]
|
||||
# Install 2.X API + Framework extension API + TensorFlow dependency
|
||||
pip install neural-compressor[tf]
|
||||
```
|
||||
**Note**: Further installation methods can be found under [Installation Guide](./docs/source/installation_guide.md). check out our [FAQ](./docs/source/faq.md) for more details.
|
||||
|
||||
## Getting Started
|
||||
After successfully installing these packages, try your first quantization program. **Following example code demonstrates FP8 Quantization**, it is supported by Intel Gaudi2 AI Accelerator.
|
||||
To try on Intel Gaudi2, docker image with Gaudi Software Stack is recommended, please refer to following script for environment setup. More details can be found in [Gaudi Guide](https://docs.habana.ai/en/latest/Installation_Guide/Bare_Metal_Fresh_OS.html#launch-docker-image-that-was-built).
|
||||
|
||||
Run a container with an interactive shell,
|
||||
```
|
||||
docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.19.0/ubuntu24.04/habanalabs/pytorch-installer-2.5.1:latest
|
||||
```
|
||||
Run the example,
|
||||
```python
|
||||
from neural_compressor.torch.quantization import (
|
||||
FP8Config,
|
||||
prepare,
|
||||
convert,
|
||||
)
|
||||
|
||||
import torch
|
||||
import torchvision.models as models
|
||||
|
||||
model = models.resnet18()
|
||||
qconfig = FP8Config(fp8_config="E4M3")
|
||||
model = prepare(model, qconfig)
|
||||
|
||||
# Customer defined calibration. Below is a dummy calibration
|
||||
model(torch.randn(1, 3, 224, 224).to("hpu"))
|
||||
|
||||
model = convert(model)
|
||||
|
||||
output = model(torch.randn(1, 3, 224, 224).to("hpu")).to("cpu")
|
||||
print(output.shape)
|
||||
```
|
||||
More [FP8 quantization doc](./docs/source/3x/PT_FP8Quant.md).
|
||||
|
||||
**Following example code demonstrates weight-only large language model loading** on Intel Gaudi2 AI Accelerator.
|
||||
```python
|
||||
from neural_compressor.torch.quantization import load
|
||||
|
||||
model_name = "TheBloke/Llama-2-7B-GPTQ"
|
||||
model = load(
|
||||
model_name_or_path=model_name,
|
||||
format="huggingface",
|
||||
device="hpu",
|
||||
torch_dtype=torch.bfloat16,
|
||||
)
|
||||
```
|
||||
**Note:** Intel Neural Compressor will convert the model format from auto-gptq to hpu format on the first load and save hpu_model.safetensors to the local cache directory for the next load. So it may take a while to load for the first time.
|
||||
|
||||
## Documentation
|
||||
|
||||
<table class="docutils">
|
||||
<thead>
|
||||
<tr>
|
||||
<th colspan="8">Overview</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td colspan="2" align="center"><a href="./docs/source/3x/design.md#architecture">Architecture</a></td>
|
||||
<td colspan="2" align="center"><a href="./docs/source/3x/design.md#workflows">Workflow</a></td>
|
||||
<td colspan="2" align="center"><a href="https://intel.github.io/neural-compressor/latest/docs/source/api-doc/apis.html">APIs</a></td>
|
||||
<td colspan="1" align="center"><a href="./docs/source/3x/llm_recipes.md">LLMs Recipes</a></td>
|
||||
<td colspan="1" align="center"><a href="./examples/3.x_api/README.md">Examples</a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<thead>
|
||||
<tr>
|
||||
<th colspan="8">PyTorch Extension APIs</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td colspan="2" align="center"><a href="./docs/source/3x/PyTorch.md">Overview</a></td>
|
||||
<td colspan="2" align="center"><a href="./docs/source/3x/PT_DynamicQuant.md">Dynamic Quantization</a></td>
|
||||
<td colspan="2" align="center"><a href="./docs/source/3x/PT_StaticQuant.md">Static Quantization</a></td>
|
||||
<td colspan="2" align="center"><a href="./docs/source/3x/PT_SmoothQuant.md">Smooth Quantization</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2" align="center"><a href="./docs/source/3x/PT_WeightOnlyQuant.md">Weight-Only Quantization</a></td>
|
||||
<td colspan="2" align="center"><a href="./docs/source/3x/PT_FP8Quant.md">FP8 Quantization</a></td>
|
||||
<td colspan="2" align="center"><a href="./docs/source/3x/PT_MXQuant.md">MX Quantization</a></td>
|
||||
<td colspan="2" align="center"><a href="./docs/source/3x/PT_MixedPrecision.md">Mixed Precision</a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<thead>
|
||||
<tr>
|
||||
<th colspan="8">Tensorflow Extension APIs</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td colspan="3" align="center"><a href="./docs/source/3x/TensorFlow.md">Overview</a></td>
|
||||
<td colspan="3" align="center"><a href="./docs/source/3x/TF_Quant.md">Static Quantization</a></td>
|
||||
<td colspan="2" align="center"><a href="./docs/source/3x/TF_SQ.md">Smooth Quantization</a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<thead>
|
||||
<tr>
|
||||
<th colspan="8">Transformers-like APIs</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td colspan="8" align="center"><a href="./docs/source/3x/transformers_like_api.md">Overview</a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<thead>
|
||||
<tr>
|
||||
<th colspan="8">Other Modules</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td colspan="4" align="center"><a href="./docs/source/3x/autotune.md">Auto Tune</a></td>
|
||||
<td colspan="4" align="center"><a href="./docs/source/3x/benchmark.md">Benchmark</a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
> **Note**:
|
||||
> From 3.0 release, we recommend to use 3.X API. Compression techniques during training such as QAT, Pruning, Distillation only available in [2.X API](https://github.com/intel/neural-compressor/blob/master/docs/source/2x_user_guide.md) currently.
|
||||
|
||||
## Selected Publications/Events
|
||||
|
||||
* EMNLP'2024: [Optimize Weight Rounding via Signed Gradient Descent for the Quantization of LLMs](https://arxiv.org/abs/2309.05516) (Sep 2024)
|
||||
* Blog on Medium: [Quantization on Intel Gaudi Series AI Accelerators](https://medium.com/intel-analytics-software/intel-neural-compressor-v3-0-a-quantization-tool-across-intel-hardware-9856adee6f11) (Aug 2024)
|
||||
* Blog by Intel: [Neural Compressor: Boosting AI Model Efficiency](https://community.intel.com/t5/Blogs/Tech-Innovation/Artificial-Intelligence-AI/Neural-Compressor-Boosting-AI-Model-Efficiency/post/1604740) (June 2024)
|
||||
* Blog by Intel: [Optimization of Intel AI Solutions for Alibaba Cloud’s Qwen2 Large Language Models](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-ai-solutions-accelerate-alibaba-qwen2-llms.html) (June 2024)
|
||||
* Blog by Intel: [Accelerate Meta* Llama 3 with Intel AI Solutions](https://www.intel.com/content/www/us/en/developer/articles/technical/accelerate-meta-llama3-with-intel-ai-solutions.html) (Apr 2024)
|
||||
* EMNLP'2023 (Under Review): [TEQ: Trainable Equivalent Transformation for Quantization of LLMs](https://openreview.net/forum?id=iaI8xEINAf&referrer=%5BAuthor%20Console%5D) (Sep 2023)
|
||||
* arXiv: [Efficient Post-training Quantization with FP8 Formats](https://arxiv.org/abs/2309.14592) (Sep 2023)
|
||||
* arXiv: [Optimize Weight Rounding via Signed Gradient Descent for the Quantization of LLMs](https://arxiv.org/abs/2309.05516) (Sep 2023)
|
||||
|
||||
> **Note**:
|
||||
> View [Full Publication List](https://github.com/intel/neural-compressor/blob/master/docs/source/publication_list.md).
|
||||
|
||||
## Additional Content
|
||||
|
||||
* [Release Information](./docs/source/releases_info.md)
|
||||
* [Contribution Guidelines](./docs/source/CONTRIBUTING.md)
|
||||
* [Legal Information](./docs/source/legal_information.md)
|
||||
* [Security Policy](SECURITY.md)
|
||||
|
||||
## Communication
|
||||
- [GitHub Issues](https://github.com/intel/neural-compressor/issues): mainly for bug reports, new feature requests, question asking, etc.
|
||||
- [Email](mailto:inc.maintainers@intel.com): welcome to raise any interesting research ideas on model compression techniques by email for collaborations.
|
||||
- [Discord Channel](https://discord.com/invite/Wxk3J3ZJkU): join the discord channel for more flexible technical discussion.
|
||||
- [WeChat group](/docs/source/imgs/wechat_group.jpg): scan the QA code to join the technical discussion.
|
13
uukssw/quote1/_ref/neural-compressor/SECURITY.md
Normal file
13
uukssw/quote1/_ref/neural-compressor/SECURITY.md
Normal file
@@ -0,0 +1,13 @@
|
||||
Security Policy
|
||||
===============
|
||||
|
||||
## Report a Vulnerability
|
||||
|
||||
Please report security issues or vulnerabilities to the [Intel® Security Center].
|
||||
|
||||
For more information on how Intel® works to resolve security issues, see
|
||||
[Vulnerability Handling Guidelines].
|
||||
|
||||
[Intel® Security Center]:https://www.intel.com/security
|
||||
|
||||
[Vulnerability Handling Guidelines]:https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html
|
47
uukssw/quote1/_ref/neural-compressor/docker/Dockerfile
Normal file
47
uukssw/quote1/_ref/neural-compressor/docker/Dockerfile
Normal file
@@ -0,0 +1,47 @@
|
||||
#
|
||||
# Copyright (c) 2022 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
ARG UBUNTU_VER=22.04
|
||||
FROM ubuntu:${UBUNTU_VER} as deploy
|
||||
|
||||
# See http://bugs.python.org/issue19846
|
||||
ENV LANG C.UTF-8
|
||||
ARG PYTHON=python3.8
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
|
||||
${PYTHON}-dev \
|
||||
gcc \
|
||||
libgl1-mesa-glx \
|
||||
libglib2.0-0 \
|
||||
python3 \
|
||||
python3-pip \
|
||||
curl
|
||||
|
||||
RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \
|
||||
pip \
|
||||
setuptools
|
||||
|
||||
RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \
|
||||
ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \
|
||||
ln -sf $(which ${PYTHON}) /usr/bin/python && \
|
||||
ln -sf $(which ${PYTHON}) /usr/bin/python3
|
||||
|
||||
ARG INC_VER=2.3
|
||||
|
||||
RUN curl https://raw.githubusercontent.com/intel/neural-compressor/v${INC_VER}/third-party-programs.txt -o /licenses/third-party-programs.txt && \
|
||||
curl https://raw.githubusercontent.com/intel/neural-compressor/v${INC_VER}/docker/third-party-programs-docker.txt -o /licenses/third-party-programs-docker.txt && \
|
||||
curl https://raw.githubusercontent.com/intel/neural-compressor/v${INC_VER}/LICENSE -o /licenses/LICENSE
|
||||
|
||||
RUN python -m pip install --no-cache-dir neural-compressor${INC_VER:+==${INC_VER}}
|
65
uukssw/quote1/_ref/neural-compressor/docker/Dockerfile.devel
Normal file
65
uukssw/quote1/_ref/neural-compressor/docker/Dockerfile.devel
Normal file
@@ -0,0 +1,65 @@
|
||||
#
|
||||
# Copyright (c) 2022 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
ARG UBUNTU_VER=20.04
|
||||
FROM ubuntu:${UBUNTU_VER} as devel
|
||||
|
||||
# See http://bugs.python.org/issue19846
|
||||
ENV LANG C.UTF-8
|
||||
|
||||
ARG PYTHON=python3.8
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
|
||||
python3 \
|
||||
python3-pip
|
||||
|
||||
RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \
|
||||
pip \
|
||||
setuptools
|
||||
|
||||
RUN ln -sf $(which ${PYTHON}) /usr/local/bin/python && \
|
||||
ln -sf $(which ${PYTHON}) /usr/local/bin/python3 && \
|
||||
ln -sf $(which ${PYTHON}) /usr/bin/python && \
|
||||
ln -sf $(which ${PYTHON}) /usr/bin/python3
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
|
||||
${PYTHON}-dev \
|
||||
${PYTHON}-distutils \
|
||||
autoconf \
|
||||
build-essential \
|
||||
cmake \
|
||||
g++ \
|
||||
git \
|
||||
libgl1-mesa-glx \
|
||||
libglib2.0-0 \
|
||||
curl
|
||||
|
||||
ARG INC_BRANCH=v2.4rc1
|
||||
RUN git clone --single-branch --branch=${INC_BRANCH} https://github.com/intel/neural-compressor.git && \
|
||||
cd neural-compressor && \
|
||||
git submodule sync && \
|
||||
git submodule update --init --recursive && \
|
||||
python -m pip install --no-cache-dir pycocotools && \
|
||||
python -m pip install --no-cache-dir -r requirements.txt && \
|
||||
python setup.py install
|
||||
|
||||
WORKDIR /neural-compressor
|
||||
|
||||
ARG INC_VER=2.3
|
||||
|
||||
RUN curl https://raw.githubusercontent.com/intel/neural-compressor/v${INC_VER}/third-party-programs.txt -o /licenses/third-party-programs.txt && \
|
||||
curl https://raw.githubusercontent.com/intel/neural-compressor/v${INC_VER}/docker/third-party-programs-docker.txt -o /licenses/third-party-programs-docker.txt && \
|
||||
curl https://raw.githubusercontent.com/intel/neural-compressor/v${INC_VER}/LICENSE -o /licenses/LICENSE
|
34
uukssw/quote1/_ref/neural-compressor/docker/README.md
Normal file
34
uukssw/quote1/_ref/neural-compressor/docker/README.md
Normal file
@@ -0,0 +1,34 @@
|
||||
## Build Intel Neural Compressor Containers:
|
||||
|
||||
### To build the `Pip` based deployment container:
|
||||
Please note that `INC_VER` must be set to a valid version published here:
|
||||
https://pypi.org/project/neural-compressor/#history
|
||||
|
||||
```console
|
||||
$ PYTHON=python3.10
|
||||
$ INC_BRANCH=3.2
|
||||
$ IMAGE_NAME=neural-compressor
|
||||
$ IMAGE_TAG=${INC_VER}
|
||||
$ docker build --build-arg PYTHON=${PYTHON} --build-arg INC_VER=${INC_VER} -f Dockerfile -t ${IMAGE_NAME}:${IMAGE_TAG} .
|
||||
```
|
||||
|
||||
### To build the `Pip` based development container:
|
||||
Please note that `INC_BRANCH` must be a set to a valid branch name otherwise, Docker build fails.
|
||||
If `${INC_BRANCH}-devel` does not meet Docker tagging requirements described here:
|
||||
https://docs.docker.com/engine/reference/commandline/tag/
|
||||
then please modify the tag so that the tagging requirement is met. For example replace `/` with `-`.
|
||||
|
||||
```console
|
||||
$ PYTHON=python3.10
|
||||
$ INC_BRANCH=3.2
|
||||
$ IMAGE_NAME=neural-compressor
|
||||
$ IMAGE_TAG=${INC_BRANCH}-devel
|
||||
$ docker build --build-arg PYTHON=${PYTHON} --build-arg INC_BRANCH=${INC_BRANCH} -f Dockerfile.devel -t ${IMAGE_NAME}:${IMAGE_TAG} .
|
||||
```
|
||||
|
||||
### Check the Containers built:
|
||||
```console
|
||||
$ docker images | grep -i neural-compressor
|
||||
neural-compressor v3.2-devel 5c0dc1371312 5 minutes ago 2.76GB
|
||||
neural-compressor 3.2 303de7f7c38d 36 minutes ago 1.61GB
|
||||
```
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,20 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = source
|
||||
BUILDDIR = build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
185
uukssw/quote1/_ref/neural-compressor/docs/build_docs/build.sh
Executable file
185
uukssw/quote1/_ref/neural-compressor/docs/build_docs/build.sh
Executable file
@@ -0,0 +1,185 @@
|
||||
#i!/bin/bash
|
||||
|
||||
help () {
|
||||
echo ""
|
||||
echo "Help:"
|
||||
echo "$0 or $0 local"
|
||||
echo " Build html for local test, not merge to gh-pages branch"
|
||||
echo "$0 version"
|
||||
echo " Build for version (version.py), then merge & push to gh-pages branch"
|
||||
echo "$0 latest"
|
||||
echo " Build for latest code, then merge & push to gh-pages branch"
|
||||
}
|
||||
|
||||
if [ ! -n "$1" ]; then
|
||||
ACT=only_build_local
|
||||
else
|
||||
if [ "$1" == "version" ]; then
|
||||
ACT=build_version
|
||||
elif [ "$1" == "latest" ]; then
|
||||
ACT=build_latest
|
||||
elif [ "$1" == "local" ]; then
|
||||
ACT=only_build_local
|
||||
elif [ "$1" == "help" ]; then
|
||||
help
|
||||
exit 0
|
||||
else
|
||||
echo "Wrong parameter \"$1\""
|
||||
help
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "ACT is ${ACT}"
|
||||
|
||||
if [ ${ACT} == "only_build_local" ]; then
|
||||
UPDATE_LATEST_FOLDER=1
|
||||
UPDATE_VERSION_FOLDER=1
|
||||
CHECKOUT_GH_PAGES=0
|
||||
elif [ ${ACT} == "build_version" ]; then
|
||||
UPDATE_LATEST_FOLDER=0
|
||||
UPDATE_VERSION_FOLDER=1
|
||||
CHECKOUT_GH_PAGES=1
|
||||
elif [ ${ACT} == "build_latest" ]; then
|
||||
UPDATE_LATEST_FOLDER=1
|
||||
UPDATE_VERSION_FOLDER=0
|
||||
CHECKOUT_GH_PAGES=1
|
||||
fi
|
||||
|
||||
WORK_DIR=../../build_tmp
|
||||
rm -rf /tmp/env_sphinx
|
||||
if [ ! -d ${WORK_DIR} ]; then
|
||||
echo "no ${WORK_DIR}"
|
||||
|
||||
else
|
||||
if [ ! -d ${WORK_DIR}/env_sphinx ]; then
|
||||
echo "no exist ${WORK_DIR}/env_sphinx"
|
||||
else
|
||||
cp -rf ${WORK_DIR}/env_sphinx /tmp/
|
||||
rm -rf ${WORK_DIR}
|
||||
echo "backup ${WORK_DIR}/env_sphinx to /tmp"
|
||||
fi
|
||||
fi
|
||||
|
||||
mkdir -p ${WORK_DIR}
|
||||
cp -rf ./* ${WORK_DIR}
|
||||
|
||||
cd ${WORK_DIR}
|
||||
|
||||
if [ ! -d /tmp/env_sphinx ]; then
|
||||
echo "no /tmp/env_sphinx"
|
||||
else
|
||||
echo "restore env_sphinx from /tmp"
|
||||
cp -r /tmp/env_sphinx ./
|
||||
fi
|
||||
|
||||
if [ ! -d env_sphinx ]; then
|
||||
echo "create env_sphinx"
|
||||
bash pip_set_env.sh
|
||||
fi
|
||||
|
||||
source env_sphinx/bin/activate
|
||||
|
||||
cp -rf ../docs/ ./source
|
||||
cp -f "../README.md" "./source/docs/source/Welcome.md"
|
||||
cp -f "../SECURITY.md" "./source/docs/source/SECURITY.md"
|
||||
|
||||
|
||||
all_md_files=`find ./source/docs -name "*.md"`
|
||||
for md_file in ${all_md_files}
|
||||
do
|
||||
sed -i 's/.md/.html/g' ${md_file}
|
||||
done
|
||||
|
||||
|
||||
sed -i 's/.\/docs\/source\/_static/./g' ./source/docs/source/Welcome.md
|
||||
sed -i 's/.md/.html/g; s/.\/docs\/source\//.\//g' ./source/docs/source/Welcome.md
|
||||
#sed -i 's/\/examples\/README.html/https:\/\/github.com\/intel\/neural-compressor\/blob\/master\/examples\/README.md/g' ./source/docs/source/user_guide.md
|
||||
sed -i 's/https\:\/\/intel.github.io\/neural-compressor\/lates.\/api-doc\/apis.html/https\:\/\/intel.github.io\/neural-compressor\/latest\/docs\/source\/api-doc\/apis.html/g' ./source/docs/source/Welcome.md
|
||||
sed -i 's/\/examples\/pytorch/https:\/\/github.com\/intel\/neural-compressor\/blob\/master\/examples\/pytorch/g' ./source/docs/source/Welcome.md
|
||||
|
||||
sed -i 's/examples\/README.html/https:\/\/github.com\/intel\/neural-compressor\/blob\/master\/examples\/README.md/g' ./source/docs/source/Welcome.md
|
||||
|
||||
sed -i 's/\/examples\/README.md/https:\/\/github.com\/intel\/neural-compressor\/blob\/master\/examples\/README.md/g' ./source/docs/source/get_started.md
|
||||
|
||||
sed -i 's/.\/validated_model_list.md\#/.\/validated_model_list.html\#/g' ./source/docs/source/installation_guide.md
|
||||
|
||||
make clean
|
||||
make html
|
||||
|
||||
if [[ $? -eq 0 ]]; then
|
||||
echo "Sphinx build online documents successfully!"
|
||||
else
|
||||
echo "Sphinx build online documents fault!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DRAFT_FOLDER=./draft
|
||||
mkdir -p ${DRAFT_FOLDER}
|
||||
VERSION=`cat source/version.txt`
|
||||
DST_FOLDER=${DRAFT_FOLDER}/${VERSION}
|
||||
LATEST_FOLDER=${DRAFT_FOLDER}/latest
|
||||
SRC_FOLDER=build/html
|
||||
|
||||
RELEASE_FOLDER=./gh-pages
|
||||
ROOT_DST_FOLDER=${RELEASE_FOLDER}/${VERSION}
|
||||
ROOT_LATEST_FOLDER=${RELEASE_FOLDER}/latest
|
||||
|
||||
if [[ ${UPDATE_VERSION_FOLDER} -eq 1 ]]; then
|
||||
echo "create ${DST_FOLDER}"
|
||||
rm -rf ${DST_FOLDER}/*
|
||||
mkdir -p ${DST_FOLDER}
|
||||
cp -r ${SRC_FOLDER}/* ${DST_FOLDER}
|
||||
python update_html.py ${DST_FOLDER} ${VERSION}
|
||||
cp -r ./source/docs/source/imgs ${DST_FOLDER}/docs/source
|
||||
cp -r ./source/docs/source/3x/imgs ${DST_FOLDER}/docs/source/3x
|
||||
|
||||
|
||||
cp source/_static/index.html ${DST_FOLDER}
|
||||
else
|
||||
echo "skip to create ${DST_FOLDER}"
|
||||
fi
|
||||
|
||||
if [[ ${UPDATE_LATEST_FOLDER} -eq 1 ]]; then
|
||||
echo "create ${LATEST_FOLDER}"
|
||||
rm -rf ${LATEST_FOLDER}/*
|
||||
mkdir -p ${LATEST_FOLDER}
|
||||
cp -r ${SRC_FOLDER}/* ${LATEST_FOLDER}
|
||||
python update_html.py ${LATEST_FOLDER} ${VERSION}
|
||||
cp -r ./source/docs/source/imgs ${LATEST_FOLDER}/docs/source
|
||||
cp -r ./source/docs/source/3x/imgs ${LATEST_FOLDER}/docs/source/3x
|
||||
cp source/_static/index.html ${LATEST_FOLDER}
|
||||
else
|
||||
echo "skip to create ${LATEST_FOLDER}"
|
||||
fi
|
||||
|
||||
echo "Create document is done"
|
||||
|
||||
if [[ ${CHECKOUT_GH_PAGES} -eq 1 ]]; then
|
||||
git clone -b gh-pages --single-branch https://github.com/intel/neural-compressor.git ${RELEASE_FOLDER}
|
||||
|
||||
if [[ ${UPDATE_VERSION_FOLDER} -eq 1 ]]; then
|
||||
python update_version.py ${ROOT_DST_FOLDER} ${VERSION}
|
||||
cp -rf ${DST_FOLDER} ${RELEASE_FOLDER}
|
||||
fi
|
||||
|
||||
if [[ ${UPDATE_LATEST_FOLDER} -eq 1 ]]; then
|
||||
cp -rf ${LATEST_FOLDER} ${RELEASE_FOLDER}
|
||||
fi
|
||||
|
||||
else
|
||||
echo "skip pull gh-pages"
|
||||
fi
|
||||
|
||||
echo "UPDATE_LATEST_FOLDER=${UPDATE_LATEST_FOLDER}"
|
||||
echo "UPDATE_VERSION_FOLDER=${UPDATE_VERSION_FOLDER}"
|
||||
|
||||
|
||||
if [[ $? -eq 0 ]]; then
|
||||
echo "create online documents successfully!"
|
||||
else
|
||||
echo "create online documents fault!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
@@ -0,0 +1,37 @@
|
||||
@ECHO OFF
|
||||
|
||||
pushd %~dp0
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
#set SPHINXBUILD=sphinx-multiversion
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=source
|
||||
set BUILDDIR=build
|
||||
|
||||
%SPHINXBUILD% >NUL 2>NUL
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.https://www.sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
goto end
|
||||
|
||||
:help
|
||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
|
||||
:end
|
||||
popd
|
10
uukssw/quote1/_ref/neural-compressor/docs/build_docs/pip_set_env.sh
Executable file
10
uukssw/quote1/_ref/neural-compressor/docs/build_docs/pip_set_env.sh
Executable file
@@ -0,0 +1,10 @@
|
||||
#!/bin/bash
|
||||
|
||||
ENV_NAME=env_sphinx
|
||||
deactivate
|
||||
rm -rf $ENV_NAME
|
||||
python -m venv $ENV_NAME
|
||||
source $ENV_NAME/bin/activate
|
||||
pip install --upgrade pip
|
||||
pip install -r sphinx-requirements.txt
|
||||
|
@@ -0,0 +1,20 @@
|
||||
/* make the page 1000px */
|
||||
.wy-nav-content {
|
||||
max-width: 1200px;
|
||||
}
|
||||
|
||||
/* code block highlight color in rtd changed to lime green, no no no */
|
||||
|
||||
.rst-content tt.literal, .rst-content code.literal, .highlight {
|
||||
background: #f0f0f0;
|
||||
}
|
||||
.rst-content tt.literal, .rst-content code.literal {
|
||||
color: #000000;
|
||||
}
|
||||
|
||||
div.version a:link {
|
||||
color: #ffffff;
|
||||
}
|
||||
div.version a:visited {
|
||||
color: #dddddd;
|
||||
}
|
@@ -0,0 +1 @@
|
||||
<meta http-equiv="refresh" content="0; URL='./docs/source/Welcome.html'" />
|
@@ -0,0 +1,3 @@
|
||||
{% extends '!footer.html' %} {% block extrafooter %} {{super}}
|
||||
<p></p><div><a href='https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html' data-cookie-notice='true'>Cookies</a> <a href='https://www.intel.com/content/www/us/en/privacy/intel-privacy-notice.html'>| Privacy</a></div>
|
||||
{% endblock %}
|
@@ -0,0 +1,16 @@
|
||||
{%- extends "!layout.html" %}
|
||||
{% block scripts %}
|
||||
<script type="text/javascript">
|
||||
// Configure TMS settings
|
||||
window.wapProfile = 'profile-microsite'; // This is mapped by WAP authorize value
|
||||
window.wapLocalCode = 'us-en'; // Dynamically set per localized site, see mapping table for values
|
||||
window.wapSection = "neural-compressor"; // WAP team will give you a unique section for your site
|
||||
window.wapEnv = 'prod'; // environment to be use in Adobe Tags.
|
||||
// Load TMS
|
||||
(() => {
|
||||
let url = 'https://www.intel.com/content/dam/www/global/wap/main/wap-microsite.js';
|
||||
let po = document.createElement('script'); po.type = 'text/javascript'; po.async = true; po.src = url;
|
||||
let s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(po, s);
|
||||
}) ();
|
||||
</script>
|
||||
{% endblock %}
|
@@ -0,0 +1,91 @@
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# For the full list of built-in configuration values, see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../../neural_compressor/"))
|
||||
import version as ver
|
||||
|
||||
version = ver.__version__
|
||||
release = version
|
||||
|
||||
with open("version.txt", "w") as f:
|
||||
f.write(version)
|
||||
|
||||
repo_url = "https://github.com/intel/neural-compressor/blob/v{}".format(version)
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
||||
|
||||
project = "Intel® Neural Compressor"
|
||||
copyright = "2022, Intel® Neural Compressor, Intel"
|
||||
author = "Intel® Neural Compressor developers"
|
||||
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||
|
||||
extensions = [
|
||||
"recommonmark",
|
||||
"sphinx_markdown_tables",
|
||||
"sphinx.ext.coverage",
|
||||
"sphinx.ext.autosummary",
|
||||
"sphinx_md",
|
||||
"sphinx_rtd_theme",
|
||||
"autoapi.extension",
|
||||
"sphinx.ext.napoleon",
|
||||
"sphinx.ext.githubpages",
|
||||
"sphinx.ext.linkcode",
|
||||
"sphinxcontrib.jquery",
|
||||
]
|
||||
|
||||
autoapi_dirs = ["../../neural_compressor"]
|
||||
autoapi_root = "autoapi"
|
||||
autoapi_keep_files = True
|
||||
autoapi_add_toctree_entry = False
|
||||
autosummary_generate = True
|
||||
autoapi_options = ["members", "show-module-summary"]
|
||||
autoapi_ignore = []
|
||||
|
||||
templates_path = ["_templates"]
|
||||
|
||||
source_suffix = [".rst", ".md"]
|
||||
|
||||
# The master toctree document.
|
||||
master_doc = "index"
|
||||
|
||||
exclude_patterns = []
|
||||
|
||||
pygments_style = "sphinx"
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
||||
|
||||
# html_theme = 'alabaster'
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
|
||||
html_static_path = ["_static"]
|
||||
templates_path = ["_templates"]
|
||||
|
||||
|
||||
def skip_util_classes(app, what, name, obj, skip, options):
|
||||
if what == "property" or what == "method":
|
||||
skip = True
|
||||
return skip
|
||||
|
||||
|
||||
def setup(app):
|
||||
app.add_css_file("custom.css")
|
||||
app.connect("autoapi-skip-member", skip_util_classes)
|
||||
|
||||
|
||||
def linkcode_resolve(domain, info):
|
||||
if domain != "py":
|
||||
return None
|
||||
if not info["module"]:
|
||||
return None
|
||||
filename = info["module"].replace(".", "/")
|
||||
return "{}/{}.py".format(repo_url, filename)
|
@@ -0,0 +1,22 @@
|
||||
|
||||
Intel® Neural Compressor Documentation
|
||||
######################################
|
||||
|
||||
Welcome to the project.
|
||||
|
||||
Sections
|
||||
********
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
docs/source/get_started.md
|
||||
docs/source/installation_guide.md
|
||||
docs/source/user_guide.md
|
||||
docs/source/examples_readme.md
|
||||
docs/source/api-doc/apis.rst
|
||||
docs/source/releases_info.md
|
||||
docs/source/contributions.md
|
||||
docs/source/legal_information.md
|
||||
docs/source/SECURITY.md
|
||||
Repo <https://github.com/intel/neural-compressor>
|
@@ -0,0 +1,10 @@
|
||||
recommonmark==0.7.1
|
||||
setuptools_scm[toml]==8.1.0
|
||||
sphinx==7.3.7
|
||||
sphinx-autoapi==3.1.0
|
||||
sphinx-autobuild==2024.4.16
|
||||
sphinx-markdown-tables==0.0.17
|
||||
sphinx-md==0.0.4
|
||||
sphinx_rtd_theme==2.0.0
|
||||
sphinxcontrib-jquery==4.1
|
||||
sphinxemoji==0.3.1
|
@@ -0,0 +1,100 @@
|
||||
import glob
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def find_index_path(index_file):
|
||||
with open(index_file, "r") as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
pos = line.find('index.html" class="icon icon-home"')
|
||||
if pos < 0:
|
||||
continue
|
||||
pos1 = line.rfind('"', 0, pos)
|
||||
if pos1 < 0:
|
||||
return ""
|
||||
else:
|
||||
return "../" + line[pos1 + 1 : pos]
|
||||
return "ignore"
|
||||
|
||||
|
||||
def update_version_link(version, folder_name, index_file):
|
||||
index_buf = ""
|
||||
index_path = find_index_path(index_file)
|
||||
if index_path == "ignore":
|
||||
return
|
||||
|
||||
with open(index_file, "r") as f:
|
||||
index_buf = f.read()
|
||||
key_str = ' <div class="version">\n {}\n </div>'.format(version)
|
||||
version_list = """<div class="version">
|
||||
<a href="{}versions.html">{}▼</a>
|
||||
<p>Click link above to switch version</p>
|
||||
</div>""".format(
|
||||
index_path, folder_name
|
||||
)
|
||||
# print(index_buf.find(key_str))
|
||||
index_buf = index_buf.replace(key_str, version_list)
|
||||
# print(index_buf)
|
||||
|
||||
with open(index_file, "w") as f:
|
||||
f.write(index_buf)
|
||||
|
||||
|
||||
def update_source_url(version, folder_name, index_file):
|
||||
if "latest" != folder_name:
|
||||
return
|
||||
|
||||
base_url = 'class="reference external" href="https://github.com/intel/neural-compressor/blob/{}/'
|
||||
repo_url = base_url.format("v" + version)
|
||||
target = base_url.format("master")
|
||||
with open(index_file, "r") as f:
|
||||
index_buf = f.read()
|
||||
index_buf = index_buf.replace(repo_url, target)
|
||||
|
||||
with open(index_file, "w") as f:
|
||||
f.write(index_buf)
|
||||
|
||||
|
||||
def update_search(folder):
|
||||
search_file_name = "{}/search.html".format(folder)
|
||||
|
||||
with open(search_file_name, "r") as f:
|
||||
index_buf = f.read()
|
||||
key_str = '<script src="_static/searchtools.js"></script>'
|
||||
version_list = """<!--[if lt IE 9]>
|
||||
<script src="_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
<script src="_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="_static/documentation_options.js?v=fc837d61"></script>
|
||||
<script src="_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="_static/js/theme.js"></script>
|
||||
<script src="_static/searchtools.js"></script>"""
|
||||
index_buf = index_buf.replace(key_str, version_list)
|
||||
|
||||
with open(search_file_name, "w") as f:
|
||||
f.write(index_buf)
|
||||
|
||||
|
||||
def main(folder, version):
|
||||
folder_name = os.path.basename(folder)
|
||||
for index_file in glob.glob("{}/**/*.html".format(folder), recursive=True):
|
||||
update_version_link(version, folder_name, index_file)
|
||||
update_source_url(version, folder_name, index_file)
|
||||
update_search(folder)
|
||||
|
||||
|
||||
def help(me):
|
||||
print("python {} html_folder version".format(me))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 3:
|
||||
help(sys.argv[0])
|
||||
sys.exit(1)
|
||||
|
||||
folder = sys.argv[1]
|
||||
version = sys.argv[2]
|
||||
main(folder, version)
|
@@ -0,0 +1,37 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def main(folder, version):
|
||||
folder_name = os.path.basename(folder)
|
||||
|
||||
version_file = "{}/versions.html".format(os.path.dirname(folder))
|
||||
# print(version_file)
|
||||
ver_buf = ""
|
||||
with open(version_file, "r") as f:
|
||||
ver_buf = f.read()
|
||||
if ver_buf.find(version) >= 0:
|
||||
return
|
||||
key_str = '<li><a href="latest">latest</a></li>'
|
||||
new_ver = """<li><a href="latest">latest</a></li>
|
||||
<li><a href="{}">{}</a></li>""".format(
|
||||
version, version
|
||||
)
|
||||
ver_buf = ver_buf.replace(key_str, new_ver)
|
||||
|
||||
with open(version_file, "w") as f:
|
||||
f.write(ver_buf)
|
||||
|
||||
|
||||
def help(me):
|
||||
print("python {} html_folder version".format(me))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 3:
|
||||
help(sys.argv[0])
|
||||
sys.exit(1)
|
||||
|
||||
folder = sys.argv[1]
|
||||
version = sys.argv[2]
|
||||
main(folder, version)
|
@@ -0,0 +1,76 @@
|
||||
2.X API User Guide
|
||||
===========================
|
||||
|
||||
Intel® Neural Compressor aims to provide popular model compression techniques such as quantization, pruning (sparsity), distillation, and neural architecture search to help the user optimize their model. The below documents could help you to get familiar with concepts and modules in Intel® Neural Compressor. Learn how to utilize the APIs in Intel® Neural Compressor to conduct quantization, pruning (sparsity), distillation, and neural architecture search on mainstream frameworks.
|
||||
|
||||
## Overview
|
||||
This part helps user to get a quick understand about design structure and workflow of 2.X Intel® Neural Compressor. We provided broad examples to help users get started.
|
||||
<table class="docutils">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td colspan="4" align="center"><a href="design.md#architecture">Architecture</a></td>
|
||||
<td colspan="3" align="center"><a href="design.md#workflow">Workflow</a></td>
|
||||
<td colspan="2" align="center"><a href="https://intel.github.io/neural-compressor/latest/docs/source/api-doc/apis.html">APIs</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2" align="center"><a href="/examples/README.md#notebook-examples">Notebook</a></td>
|
||||
<td colspan="1" align="center"><a href="/examples/README.md">Examples</a></td>
|
||||
<td colspan="1" align="center"><a href="validated_model_list.md">Results</a></td>
|
||||
<td colspan="5" align="center"><a href="https://software.intel.com/content/www/us/en/develop/documentation/get-started-with-ai-linux/top.html">Intel oneAPI AI Analytics Toolkit</a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
## Python-based APIs
|
||||
Python-based APIs contains more details about the functional APIs in Intel® Neural Compressor,
|
||||
which introduce the mechanism of each function and provides a tutorial to help the user apply in their own cases.
|
||||
Please note that we will stop to support Intel Neural Compressor 1.X API in the future.
|
||||
So we provide a comprehensive migration document in Code Migration to help the user update their code from previous 1.X version to the new 2.X version.
|
||||
In 2.X API, it's very important to create the `DataLoader` and `Metrics` for your examples, so we provide the detail introductions.
|
||||
|
||||
<table class="docutils">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td colspan="2" align="center"><a href="quantization.md">Quantization</a></td>
|
||||
<td colspan="3" align="center"><a href="mixed_precision.md">Advanced Mixed Precision</a></td>
|
||||
<td colspan="2" align="center"><a href="pruning.md">Pruning (Sparsity)</a></td>
|
||||
<td colspan="2" align="center"><a href="distillation.md">Distillation</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2" align="center"><a href="orchestration.md">Orchestration</a></td>
|
||||
<td colspan="2" align="center"><a href="benchmark.md">Benchmarking</a></td>
|
||||
<td colspan="3" align="center"><a href="distributed.md">Distributed Compression</a></td>
|
||||
<td colspan="3" align="center"><a href="export.md">Model Export</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="9" align="center"><a href="migration.md">Code Migration from Intel® Neural Compressor 1.X to Intel® Neural Compressor 2.X</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="4" align="center"><a href="dataloader.md">DataLoader</a></td>
|
||||
<td colspan="5" align="center"><a href="metric.md">Metric</a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
## Advanced Topics
|
||||
This part provides the advanced topics that help user dive deep into Intel® Neural Compressor 2.X API.
|
||||
<table class="docutils">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td colspan="3" align="center"><a href="adaptor.md">Adaptor</a></td>
|
||||
<td colspan="3" align="center"><a href="tuning_strategies.md">Strategy</a></td>
|
||||
<td colspan="3" align="center"><a href="objective.md">Objective</a></td>
|
||||
<td colspan="3" align="center"><a href="calibration.md">Calibration</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="6" align="center"><a href="add_new_data_type.md">Add New Data Type</a></td>
|
||||
<td colspan="6" align="center"><a href="add_new_adaptor.md">Add New Adaptor</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="3" align="center"><a href="distillation_quantization.md">Distillation for Quantization</a></td>
|
||||
<td colspan="3" align="center"><a href="smooth_quant.md">SmoothQuant</a></td>
|
||||
<td colspan="3" align="center"><a href="quantization_weight_only.md">Weight-Only Quantization</a></td>
|
||||
<td colspan="3" align="center"><a href="quantization_layer_wise.md">Layer-Wise Quantization</a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
@@ -0,0 +1,42 @@
|
||||
Dynamic Quantization
|
||||
===============
|
||||
|
||||
1. [Introduction](#introduction)
|
||||
2. [Getting Started with Dynamic Quantization](#Getting-Started-with-Dynamic-Quantization)
|
||||
3. [Examples](#examples)
|
||||
|
||||
|
||||
## Introduction
|
||||
Quantization is the process of converting floating point weights and activations to lower bitwidth tensors by multiplying the floating point values by a scale factor and rounding the results to whole numbers. Dynamic quantization determines the scale factor for activations dynamically based on the data range observed at runtime. We support W8A8 (quantizing weights and activations into 8 bits) dynamic quantization by leveraging torch's [`X86InductorQuantizer`](https://pytorch.org/tutorials/prototype/pt2e_quant_x86_inductor.html?highlight=x86inductorquantizer).
|
||||
|
||||
|
||||
## Getting Started with Dynamic Quantization
|
||||
There are four steps to perform W8A8 dynamic quantization: `export`, `prepare`, `convert` and `compile`.
|
||||
|
||||
```python
|
||||
import torch
|
||||
from neural_compressor.torch.export import export
|
||||
from neural_compressor.torch.quantization import DynamicQuantConfig, prepare, convert
|
||||
|
||||
# Prepare the float model and example inputs for export model
|
||||
model = UserFloatModel()
|
||||
example_inputs = ...
|
||||
|
||||
# Export eager model into FX graph model
|
||||
exported_model = export(model=model, example_inputs=example_inputs)
|
||||
# Quantize the model
|
||||
quant_config = DynamicQuantConfig()
|
||||
prepared_model = prepare(exported_model, quant_config=quant_config)
|
||||
q_model = convert(prepared_model)
|
||||
# Compile the quantized model and replace the Q/DQ pattern with Q-operator
|
||||
from torch._inductor import config
|
||||
|
||||
config.freezing = True
|
||||
opt_model = torch.compile(q_model)
|
||||
```
|
||||
|
||||
> Note: The `set_local` of `DynamicQuantConfig` will be supported after the torch 2.4 release.
|
||||
|
||||
|
||||
## Examples
|
||||
Example will be added later.
|
@@ -0,0 +1,267 @@
|
||||
FP8 Quantization
|
||||
=======
|
||||
|
||||
1. [Introduction](#introduction)
|
||||
2. [Supported Parameters](#supported-parameters)
|
||||
3. [Get Start with FP8 Quantization](#get-start-with-fp8-quantization)
|
||||
4. [Optimum-habana LLM example](#optimum-habana-LLM-example)
|
||||
5. [VLLM example](#VLLM-example)
|
||||
|
||||
## Introduction
|
||||
|
||||
Float point 8 (FP8) is a promising data type for low precision quantization which provides a data distribution that is completely different from INT8 and it's shown as below.
|
||||
|
||||
<div align="center">
|
||||
<img src="./imgs/fp8_dtype.png" height="250"/>
|
||||
</div>
|
||||
|
||||
Intel Gaudi2, also known as HPU, provides this data type capability for low precision quantization, which includes `E4M3` and `E5M2`. For more information about these two data type, please refer to [link](https://arxiv.org/abs/2209.05433).
|
||||
|
||||
Intel Neural Compressor provides general quantization APIs to leverage HPU FP8 capability. with simple with lower memory usage and lower compute cost, 8 bit model
|
||||
|
||||
## Supported Parameters
|
||||
|
||||
<table class="tg"><thead>
|
||||
<tr>
|
||||
<th class="tg-fymr">Attribute</th>
|
||||
<th class="tg-fymr">Description</th>
|
||||
<th class="tg-fymr">Values</th>
|
||||
</tr></thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td class="tg-0pky">fp8_config</td>
|
||||
<td class="tg-0pky">The target data type of FP8 quantization.</td>
|
||||
<td class="tg-0pky">E4M3 (default) - As Fig. 2<br>E5M2 - As Fig. 1.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-0pky">hp_dtype</td>
|
||||
<td class="tg-0pky">The high precision data type of non-FP8 operators.</td>
|
||||
<td class="tg-0pky">bf16 (default) - torch.bfloat16<br>fp16 - torch.float16.<br>fp32 - torch.float32.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-0pky">observer</td>
|
||||
<td class="tg-0pky">The observer to measure the statistics.</td>
|
||||
<td class="tg-0pky">maxabs (default), saves all tensors to files.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-0pky">allowlist</td>
|
||||
<td class="tg-0pky">List of nn.Module names or types to quantize. When setting an empty list, all the supported modules will be quantized by default. See Supported Modules. Not setting the list at all is not recommended as it will set the allowlist to these modules only: torch.nn.Linear, torch.nn.Conv2d, and BMM.</td>
|
||||
<td class="tg-0pky">Default = {'names': [], 'types': <span title=["Matmul","Linear","FalconLinear","KVCache","Conv2d","LoRACompatibleLinear","LoRACompatibleConv","Softmax","ModuleFusedSDPA","LinearLayer","LinearAllreduce","ScopedLinearAllReduce","LmHeadLinearAllreduce"]>FP8_WHITE_LIST}</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-0pky">blocklist</td>
|
||||
<td class="tg-0pky">List of nn.Module names or types not to quantize. Defaults to empty list, so you may omit it from the config file.</td>
|
||||
<td class="tg-0pky">Default = {'names': [], 'types': ()}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-0pky">mode</td>
|
||||
<td class="tg-0pky">The mode, measure or quantize, to run HQT with.</td>
|
||||
<td class="tg-0pky">MEASURE - Measure statistics of all modules and emit the results to dump_stats_path.<br>QUANTIZE - Quantize and run the model according to the provided measurements.<br>AUTO (default) - Select from [MEASURE, QUANTIZE] automatically.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-0pky">dump_stats_path</td>
|
||||
<td class="tg-0pky">The path to save and load the measurements. The path is created up until the level before last "/". The string after the last / will be used as prefix to all the measurement files that will be created.</td>
|
||||
<td class="tg-0pky">Default = "./hqt_output/measure"</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-0pky">scale_method</td>
|
||||
<td class="tg-0pky">The method for calculating the scale from the measurement.</td>
|
||||
<td class="tg-0pky">- unit_scale - Always use scale of 1.<br>- hw_aligned_single_scale - Always use scale that's aligned to the corresponding HW accelerated scale.<br>- maxabs_hw (default) - Scale is calculated to stretch/compress the maxabs measurement to the full-scale of FP8 and then aligned to the corresponding HW accelerated scale.<br>- maxabs_pow2 - Scale is calculated to stretch/compress the maxabs measurement to the full-scale of FP8 and then rounded to the power of 2.<br>- maxabs_hw_opt_weight - Scale of model params (weights) is chosen as the scale that provides minimal mean-square-error between quantized and non-quantized weights, from all possible HW accelerated scales. Scale of activations is calculated the same as maxabs_hw.<br>- act_maxabs_pow2_weights_pcs_opt_pow2 - Scale of model params (weights) is calculated per-channel of the params tensor. The scale per-channel is calculated the same as maxabs_hw_opt_weight. Scale of activations is calculated the same as maxabs_pow2.<br>- act_maxabs_hw_weights_pcs_maxabs_pow2 - Scale of model params (weights) is calculated per-channel of the params tensor. The scale per-channel is calculated the same as maxabs_pow2. Scale of activations is calculated the same as maxabs_hw.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-0pky">measure_exclude</td>
|
||||
<td class="tg-0pky">If this attribute is not defined, the default is OUTPUT. Since most models do not require measuring output tensors, you can exclude it to speed up the measurement process.</td>
|
||||
<td class="tg-0pky">NONE - All tensors are measured.<br>OUTPUT (default) - Excludes measurement of output tensors.</td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
|
||||
## Get Start with FP8 Quantization
|
||||
[Demo Usage](https://github.com/intel/neural-compressor?tab=readme-ov-file#getting-started)
|
||||
[Computer vision example](../../../examples/3.x_api/pytorch/cv/fp8_quant)
|
||||
|
||||
## Optimum-habana LLM example
|
||||
### Overview
|
||||
[Optimum](https://huggingface.co/docs/optimum) is an extension of Transformers that provides a set of performance optimization tools to train and run models on targeted hardware with maximum efficiency.
|
||||
[Optimum-habana](https://github.com/huggingface/optimum-habana) is the interface between the Transformers, Diffusers libraries and Intel Gaudi AI Accelerators (HPU). It provides higher performance based on modified modeling files, and utilizes Intel Neural Compressor for FP8 quantization internally, [running-with-fp8](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation#running-with-fp8)
|
||||

|
||||
### Installation
|
||||
Refer to [optimum-habana, install-the-library-and-get-example-scripts](https://github.com/huggingface/optimum-habana?tab=readme-ov-file#install-the-library-and-get-example-scripts)
|
||||
Option to install from source,
|
||||
```
|
||||
$ git clone https://github.com/huggingface/optimum-habana
|
||||
$ cd optimum-habana && git checkout v1.14.0 (change the version)
|
||||
$ pip install -e .
|
||||
$ pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0
|
||||
$ cd examples/text-generation
|
||||
$ pip install -r requirements.txt
|
||||
$ pip install -r requirements_lm_eval.txt (Option)
|
||||
```
|
||||
### Check neural_compressor code
|
||||
> optimum-habana/examples/text-generation/utils.py
|
||||
>> initialize_model() -> setup_model() -> setup_quantization() -> FP8Config/prepare()/convert()
|
||||
|
||||
### FP8 KV cache
|
||||
Introduction: [kv-cache-quantization in huggingface transformers](https://huggingface.co/blog/kv-cache-quantization)
|
||||
|
||||
BF16 KVCache Code -> [Modeling_all_models.py -> KVCache()](https://github.com/huggingface/optimum-habana/blob/main/optimum/habana/transformers/models/modeling_all_models.py)
|
||||
|
||||
FP8 KVCache code trace with neural compressor support, for example Llama models,
|
||||
> optimum-habana/optimum/habana/transformers/models/llama/modeling_llama.py
|
||||
>> GaudiLlamaForCausalLM() -> self.model()
|
||||
>>> GaudiLlamaModel() -> forward() -> decoder_layer() -> GaudiLlamaDecoderLayer() forward() -> pre_attn() -> pre_attn_forward() -> self.k_cache.update
|
||||
|
||||
> neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py
|
||||
>> PatchedKVCache() -> update()
|
||||
>> PatchedModuleFusedSDPA()
|
||||
|
||||
Models list which support FP8 KV Cache,
|
||||
```
|
||||
microsoft/Phi-3-mini-4k-instruct
|
||||
bigcode/starcoder2-3b
|
||||
Qwen/Qwen2.5-7B-Instruct|
|
||||
meta-llama/Llama-3.2-3B-Instruct
|
||||
tiiuae/falcon-7b-instruct
|
||||
mistralai/Mixtral-8x7B-Instruct-v0.1
|
||||
EleutherAI/gpt-j-6b
|
||||
mistralai/Mistral-Nemo-Instruct-2407
|
||||
...
|
||||
```
|
||||
|
||||
### Running with FP8
|
||||
Refer to [here](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation#running-with-fp8).
|
||||
Change "--model_name_or_path" to be your model like
|
||||
"meta-llama/Llama-3.1-8B-Instruct",
|
||||
"Qwen/Qwen2.5-7B-Instruct", or
|
||||
"mistralai/Mixtral-8x7B-Instruct-v0.1" and so on.
|
||||
"--use_kv_cache" is to enable FP8 KV cache.
|
||||
|
||||
### Profiling
|
||||
Add "--profiling_warmup_steps 5 --profiling_steps 2 --profiling_record_shapes" as args in the end of commandline of run_generation.py.
|
||||
Refer to [torch.profiler.ProfilerActivity.HPU](https://github.com/huggingface/optimum-habana/blob/c9e1c23620618e2f260c92c46dfeb163545ec5ba/optimum/habana/utils.py#L305).
|
||||
|
||||
### FP8 Accuracy
|
||||
"lm_eval.tasks", "lm_eval.evaluator", "lm_eval" are installed from the above requirements_lm_eval.txt. The tasks can be set and the default is ["hellaswag", "lambada_openai", "piqa", "winogrande"], [more info](https://github.com/EleutherAI/lm-evaluation-harness/)
|
||||
|
||||
| `Llama-2-7b-hf`| fp8 & fp8 KVCache| bf16 w/ bf16 KVCache|
|
||||
|---------------|---------|--------|
|
||||
| hellaswag | 0.5691097390957977 | 0.5704043019318861 |
|
||||
| lambada_openai| 0.7360760721909567 | 0.7372404424607025 |
|
||||
| piqa | 0.7850924918389554 | 0.7818280739934712 |
|
||||
| winogrande | 0.6929755327545383 | 0.6929755327545383 |
|
||||
|
||||
| `Qwen2.5-7B-Instruct`| fp8 & fp8 KVCache| bf16 w/ bf16 KVCache|
|
||||
|---------------|---------|--------|
|
||||
| hellaswag | 0.2539334793865764 | 0.2539334793865764 |
|
||||
| lambada_openai| 0.0 | 0.0 |
|
||||
| piqa | 0.5391730141458106 | 0.5391730141458106 |
|
||||
| winogrande | 0.4956590370955012 | 0.4956590370955012 |
|
||||
|
||||
| `Llama-3.1-8B-Instruct`| fp8 & fp8 KVCache| bf16 w/ bf16 KVCache|
|
||||
|---------------|---------|--------|
|
||||
| hellaswag | 0.5934076877116112 | 0.5975901214897431 |
|
||||
| lambada_openai| 0.7230739375121289 | 0.7255967397632447 |
|
||||
| piqa | 0.7932535364526659 | 0.8030467899891186 |
|
||||
| winogrande | 0.7434885556432518 | 0.7371744277821626 |
|
||||
|
||||
|
||||
| `Mixtral-8x7B-Instruct-v0.1`| fp8 & fp8 KVCache| bf16 w/ bf16 KVCache|
|
||||
|---------------|---------|--------|
|
||||
| hellaswag | 0.25323640709022105 | 0.25323640709022105 |
|
||||
| lambada_openai| 0.0 | 0.0 |
|
||||
| piqa | 0.528835690968444 | 0.528835690968444 |
|
||||
| winogrande | 0.4956590370955012 | 0.4956590370955012 |
|
||||
|
||||
## VLLM example
|
||||
### Overview
|
||||

|
||||
|
||||
### Installation
|
||||
Refer to [Habana vllm-fork](https://github.com/HabanaAI/vllm-fork) to install.
|
||||
Option to install `vllm-hpu-extension`, `neural_compressor` and `vllm` from the source,
|
||||
```
|
||||
$ git clone https://github.com/HabanaAI/vllm-fork.git
|
||||
$ cd vllm-fork
|
||||
$ pip install -r requirements-hpu.txt
|
||||
$ python setup.py develop --user
|
||||
|
||||
## Check
|
||||
$ pip list |grep vllm
|
||||
vllm 0.6.3.dev1122+g2f43ebf5.d20241121.gaudi118 /home/fengding/vllm-fork
|
||||
vllm-hpu-extension 0.1
|
||||
|
||||
## Validation
|
||||
$ VLLM_SKIP_WARMUP=true python3 examples/offline_inference.py
|
||||
......
|
||||
Prompt: 'Hello, my name is', Generated text: ' Kelly and I have a job to do.\nI need someone to come over'
|
||||
Prompt: 'The president of the United States is', Generated text: ' facing a sharp criticism of his handling of the coronavirus pandemic, including'
|
||||
Prompt: 'The capital of France is', Generated text: ' the capital of the Socialist Party of France (SPF), with its state-'
|
||||
Prompt: 'The future of AI is', Generated text: " in what's coming, not what's coming.\nI don't know what"
|
||||
```
|
||||
|
||||
### Run FP8 calibration
|
||||
Refer to [vllm-hpu-extension->calibration](https://github.com/HabanaAI/vllm-hpu-extension/tree/main/calibration)
|
||||
```
|
||||
$ git clone https://github.com/HabanaAI/vllm-hpu-extension
|
||||
$ cd vllm-hpu-extension/calibration
|
||||
|
||||
# For Llama-3.1.8B-Instruct
|
||||
$ ./calibrate_model.sh -m meta-llama/Llama-3.1-8B-Instruct -d /home/fengding/processed-data.pkl -o ./output_llama3.1.8b.Instruct -b 128 -t 1 -l 128
|
||||
## Generate scale factors in ./output_llama3.1.8b.Instruct
|
||||
```
|
||||
|
||||
### Start vllm server
|
||||
```
|
||||
$ cd vllm-fork/
|
||||
|
||||
$ PT_HPU_ENABLE_LAZY_COLLECTIVES=true \
|
||||
PT_HPU_WEIGHT_SHARING=0 \
|
||||
VLLM_CONTIGUOUS_PA=true \
|
||||
VLLM_SKIP_WARMUP=true \
|
||||
QUANT_CONFIG=output_llama3.1.8b.Instruct/maxabs_quant_g2.json \
|
||||
python3 -m vllm.entrypoints.openai.api_server \
|
||||
--model meta-llama/Llama-3.1-8B-Instruct \
|
||||
--port 8080 \
|
||||
--gpu-memory-utilization 0.9 \
|
||||
--tensor-parallel-size 1 \
|
||||
--disable-log-requests \
|
||||
--block-size 128 \
|
||||
--quantization inc \
|
||||
--kv-cache-dtype fp8_inc \
|
||||
--device hpu \
|
||||
--weights-load-device cpu \
|
||||
--dtype bfloat16 \
|
||||
--num_scheduler_steps 16 2>&1 > vllm_serving.log &
|
||||
```
|
||||
Refer to [vllm-fork->README_GAUDI.md](https://github.com/HabanaAI/vllm-fork/blob/habana_main/README_GAUDI.md) for more details.
|
||||
|
||||
### Start client to test
|
||||
```
|
||||
$ curl --noproxy "*" http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{"model": "meta-llama/Llama-3.1-8B-Instruct", "prompt": "San Francisco is a", "max_tokens": 100}'
|
||||
```
|
||||
|
||||
### Run benchmark
|
||||
```
|
||||
python benchmarks/benchmark_serving.py \
|
||||
--backend vllm \
|
||||
--model meta-llama/Llama-3.1-8B-Instruct \
|
||||
--dataset-name sonnet \
|
||||
--dataset-path benchmarks/sonnet.txt \
|
||||
--request-rate 128 \
|
||||
--num-prompts 128 \
|
||||
--port 8080 \
|
||||
--sonnet-input-len 128 \
|
||||
--sonnet-output-len 128 \
|
||||
--sonnet-prefix-len 100
|
||||
```
|
||||
|
||||
### FP8 KV cache
|
||||
Code trace
|
||||
> vllm-fork/vllm/attention/backends/hpu_attn.py
|
||||
>> from vllm_hpu_extension.utils import Matmul, Softmax, VLLMKVCache
|
||||
>> HPUAttentionImpl() -> self.k_cache() / self.v_cache()
|
||||
|
||||
> neural_compressor/torch/algorithms/fp8_quant/_quant_common/helper_modules.py
|
||||
>> PatchedVLLMKVCache()
|
||||
|
||||
> neural_compressor/torch/algorithms/fp8_quant/common.py
|
||||
>> "VLLMKVCache": ModuleInfo("kv_cache", PatchedVLLMKVCache)
|
@@ -0,0 +1,107 @@
|
||||
Microscaling Quantization
|
||||
===============
|
||||
|
||||
1. [Introduction](#introduction)
|
||||
2. [Get Started with Microscaling Quantization API](#get-start-with-microscaling-quantization-api)
|
||||
3. [Examples](#examples)
|
||||
4. [Reference](#reference)
|
||||
|
||||
## Introduction
|
||||
|
||||
Numerous breakthroughs have emerged across various fields, such as text analysis, language translation and chatbot technologies, fueled by the development of large language models (LLMs). Nevertheless, their increasing power comes with the challenge of explosive growth in parameters, posing obstacles for practical use. To balance memory limits and accuracy preservation for AI models, the Microscaling (MX) specification was promoted from the well-known Microsoft Floating Point (MSFP) data type [1, 2]:
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th>Format Name</th>
|
||||
<th>Element Data type</th>
|
||||
<th>Element Bits</th>
|
||||
<th>Scaling Block Size</th>
|
||||
<th>Scale Data Type</th>
|
||||
<th>Scale Bits</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td rowspan="2">MXFP8</td>
|
||||
<td>FP8 (E5M2)</td>
|
||||
<td rowspan="2">8</td>
|
||||
<td rowspan="2">32</td>
|
||||
<td rowspan="2">E8M0</td>
|
||||
<td rowspan="2">8</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>FP8 (E4M3)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td rowspan="2">MXFP6</td>
|
||||
<td>FP6 (E3M2)</td>
|
||||
<td rowspan="2">6</td>
|
||||
<td rowspan="2">32</td>
|
||||
<td rowspan="2">E8M0</td>
|
||||
<td rowspan="2">8</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>FP6 (E2M3)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>MXFP4</td>
|
||||
<td>FP4 (E2M1)</td>
|
||||
<td>4</td>
|
||||
<td>32</td>
|
||||
<td>E8M0</td>
|
||||
<td>8</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>MXINT8</td>
|
||||
<td>INT8</td>
|
||||
<td>8</td>
|
||||
<td>32</td>
|
||||
<td>E8M0</td>
|
||||
<td>8</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
|
||||
At an equivalent accuracy level, the MX data type demonstrates the ability to occupy a smaller area and incur lower energy costs for multiply-accumulate compared to other conventional data types on the same silicon [1].
|
||||
|
||||
Neural Compressor seamlessly applies the MX data type to post-training quantization, offering meticulously crafted recipes to empower users to quantize LLMs without sacrificing accuracy. The workflow is shown as below.
|
||||
|
||||
<a target="_blank" href="./imgs/mx_workflow.png" text-align:left>
|
||||
<left>
|
||||
<img src="./imgs/mx_workflow.png" alt="Workflow of MX Quant (source [3])" height=120>
|
||||
</left>
|
||||
</a>
|
||||
|
||||
The memory and computational limits of LLMs are more severe than other general neural networks, so our exploration focuses on LLMs first. The following table shows the basic MX quantization recipes in Neural Compressor and enumerates distinctions among various data types. The MX data type replaces general float scale with powers of two to be more hardware-friendly. It adapts a granularity falling between per-channel and per-tensor to balance accuracy and memory consumption.
|
||||
|
||||
| | MX Format | INT8 | FP8 |
|
||||
|------------|--------------|------------|------------|
|
||||
| Scale | $2^{exp}$ | $\frac{MAX}{amax}$ | $\frac{MAX}{amax}$ |
|
||||
| Zero point | 0 (None) | $2^{bits - 1}$ or $-min * scale$ | 0 (None) |
|
||||
| Granularity | per-block (default blocksize is 32) | per-channel or per-tensor | per-channel or per-tensor |
|
||||
|
||||
The exponent (exp) is equal to torch.floor(torch.log2(amax)), MAX is the representation range of the data type, amax is the max absolute value of per-block tensor, and rmin is the minimum value of the per-block tensor.
|
||||
|
||||
|
||||
## Get Started with Microscaling Quantization API
|
||||
|
||||
To get a model quantized with Microscaling Data Types, users can use the Microscaling Quantization API as follows.
|
||||
|
||||
```python
|
||||
from neural_compressor.torch.quantization import MXQuantConfig, prepare, convert
|
||||
|
||||
quant_config = MXQuantConfig(w_dtype=args.w_dtype, act_dtype=args.act_dtype, weight_only=args.woq)
|
||||
user_model = prepare(model=user_model, quant_config=quant_config)
|
||||
user_model = convert(model=user_model)
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
- PyTorch [huggingface models](/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant)
|
||||
|
||||
|
||||
## Reference
|
||||
|
||||
[1]: Darvish Rouhani, Bita, et al. "Pushing the limits of narrow precision inferencing at cloud scale with microsoft floating point." Advances in neural information processing systems 33 (2020): 10271-10281
|
||||
|
||||
[2]: OCP Microscaling Formats (MX) Specification
|
||||
|
||||
[3]: Rouhani, Bita Darvish, et al. "Microscaling Data Formats for Deep Learning." arXiv preprint arXiv:2310.10537 (2023).
|
@@ -0,0 +1,111 @@
|
||||
PyTorch Mixed Precision
|
||||
========================================
|
||||
|
||||
1. [Introduction](#introduction)
|
||||
2. [Mixed Precision Support Matrix](#mixed-precision-support-matrix)
|
||||
3. [Get Started](#get-start)
|
||||
4. [Examples](#examples)
|
||||
|
||||
## Introduction
|
||||
|
||||
The recent growth of Deep Learning has driven the development of more complex models that require significantly more compute and memory capabilities. Several low precision numeric formats have been proposed to address the problem.
|
||||
Google's [bfloat16](https://cloud.google.com/tpu/docs/bfloat16) and the [FP16: IEEE](https://en.wikipedia.org/wiki/Half-precision_floating-point_format) half-precision format are two of the most widely used sixteen bit formats. [Mixed precision](https://arxiv.org/abs/1710.03740) training and inference using low precision formats have been developed to reduce compute and bandwidth requirements.
|
||||
|
||||
The 3rd Gen Intel® Xeon® Scalable processor (codenamed Cooper Lake), featuring Intel® Deep Learning Boost, is the first general-purpose x86 CPU to support the bfloat16 format. Specifically, three new bfloat16 instructions are added as a part of the AVX512_BF16 extension within Intel Deep Learning Boost: VCVTNE2PS2BF16, VCVTNEPS2BF16, and VDPBF16PS. The first two instructions allow converting to and from bfloat16 data type, while the last one performs a dot product of bfloat16 pairs.
|
||||
Further details can be found in the [Hardware Numerics Document](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-deep-learning-boost-new-instruction-bfloat16.html) published by Intel.
|
||||
|
||||
The 4th Gen Intel® Xeon® Scalable processor supports FP16 instruction set architecture (ISA) for Intel® Advanced Vector Extensions 512 (Intel® AVX-512). The new ISA supports a wide range of general-purpose numeric operations for 16-bit half-precision IEEE-754 floating-point and complements the existing 32-bit and 64-bit floating-point instructions already available in the Intel Xeon processor based products.
|
||||
Further details can be found in the [Intel AVX512 FP16 Guide](https://www.intel.com/content/www/us/en/content-details/669773/intel-avx-512-fp16-instruction-set-for-intel-xeon-processor-based-products-technology-guide.html) published by Intel.
|
||||
|
||||
The latest Intel Xeon processors deliver flexibility of Intel Advanced Matrix Extensions (Intel AMX) ,an accelerator that improves the performance of deep learning(DL) training and inference, making it ideal for workloads like NLP, recommender systems, and image recognition. Developers can code AI functionality to take advantage of the Intel AMX instruction set, and they can code non-AI functionality to use the processor instruction set architecture (ISA). Intel has integrated the Intel® oneAPI Deep Neural Network Library (oneDNN), its oneAPI DL engine, into Pytorch.
|
||||
Further details can be found in the [Intel AMX Document](https://www.intel.com/content/www/us/en/content-details/785250/accelerate-artificial-intelligence-ai-workloads-with-intel-advanced-matrix-extensions-intel-amx.html) published by Intel.
|
||||
|
||||
<p align="center" width="100%">
|
||||
<img src="./imgs/data_format.png" alt="Architecture" height=230>
|
||||
</p>
|
||||
|
||||
## Mixed Precision Support Matrix
|
||||
|
||||
<table class="center">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Framework</th>
|
||||
<th>Backend</th>
|
||||
<th>Backend Library</th>
|
||||
<th>Backend Value</th>
|
||||
<th>Support Device(cpu as default)</th>
|
||||
<th>Support BF16</th>
|
||||
<th>Support FP16</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td rowspan="1" align="left">PyTorch</td>
|
||||
<td align="left">FX</td>
|
||||
<td align="left">FBGEMM</td>
|
||||
<td align="left">"default"</td>
|
||||
<td align="left">cpu</td>
|
||||
<td align="left">✔</td>
|
||||
<td align="left">✔</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
||||
### Hardware and Software requests for **BF16**
|
||||
- PyTorch
|
||||
1. Hardware: CPU supports `avx512_bf16` instruction set.
|
||||
2. Software: torch >= [1.11.0](https://download.pytorch.org/whl/torch_stable.html).
|
||||
|
||||
|
||||
### Hardware and Software requests for **FP16**
|
||||
- PyTorch
|
||||
1. Hardware: CPU supports `avx512_fp16` instruction set.
|
||||
2. Software: torch >= [1.11.0](https://download.pytorch.org/whl/torch_stable.html).
|
||||
> Note: To run FP16 on Intel-AMX, please set the environment variable `ONEDNN_MAX_CPU_ISA`:
|
||||
> ```export ONEDNN_MAX_CPU_ISA=AVX512_CORE_AMX_FP16```
|
||||
|
||||
|
||||
|
||||
### Accuracy-driven mixed precision
|
||||
BF16/FP16 conversion may lead to accuracy drop. Intel® Neural Compressor provides an accuracy-driven tuning function to reduce accuracy loss,
|
||||
which could fallback converted ops to FP32, if set in config, to get better accuracy. To enable this function, users only to provide
|
||||
`eval_fn` and `eval_args` for `autotune`.
|
||||
To be noticed, IPEX backend doesn't support accuracy-driven mixed precision.
|
||||
|
||||
## Get Started with autotune API
|
||||
|
||||
To get a bf16/fp16 model, users can use the `autotune` interface with `MixedPrecisionConfig` as follows.
|
||||
|
||||
- BF16:
|
||||
|
||||
```python
|
||||
from neural_compressor.torch.quantization import MixedPrecisionConfig, TuningConfig, autotune
|
||||
|
||||
def eval_acc_fn(model):
|
||||
......
|
||||
return acc
|
||||
|
||||
# modules might be fallback to fp32 to get better accuracy
|
||||
custom_tune_config = TuningConfig(config_set=[MixedPrecisionConfig(dtype=["bf16", "fp32"])], max_trials=3)
|
||||
best_model = autotune(model=build_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn)
|
||||
```
|
||||
|
||||
- FP16:
|
||||
|
||||
```python
|
||||
from neural_compressor.torch.quantization import MixedPrecisionConfig, TuningConfig, autotune
|
||||
|
||||
def eval_acc_fn(model):
|
||||
......
|
||||
return acc
|
||||
|
||||
# modules might be fallback to fp32 to get better accuracy
|
||||
custom_tune_config = TuningConfig(config_set=[MixedPrecisionConfig(dtype=["fp16", "fp32"])], max_trials=3)
|
||||
best_model = autotune(model=build_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn)
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/cv/mixed_precision
|
||||
) on how to quantize a model with Mixed Precision.
|
@@ -0,0 +1,112 @@
|
||||
PyTorch Smooth Quantization
|
||||
========================================
|
||||
|
||||
1. [Introduction](#Introduction)
|
||||
2. [Usage](#Usage)
|
||||
3. [Validated Models](#Validated-Models)
|
||||
4. [Supported Framework Matrix](#Supported-Framework-Matrix)
|
||||
|
||||
|
||||
## Introduction
|
||||
Quantization is a common compression operation to reduce memory and accelerate inference by converting the floating point matrix to an integer matrix. For large language models (LLMs) with gigantic parameters, the systematic outliers make quantification of activations difficult. [SmoothQuant](https://arxiv.org/abs/2211.10438), a training free post-training quantization (PTQ) solution, offline migrates this difficulty from activations to weights with a mathematically equivalent transformation.
|
||||
|
||||
|
||||
## Usage
|
||||
### Fixed Alpha
|
||||
To set a fixed alpha for the entire model, users can follow this example:
|
||||
|
||||
```python
|
||||
from neural_compressor.torch.quantization import SmoothQuantConfig, convert, prepare
|
||||
|
||||
|
||||
def run_fn(model):
|
||||
model(example_inputs)
|
||||
|
||||
|
||||
quant_config = SmoothQuantConfig(alpha=0.5)
|
||||
prepared_model = prepare(fp32_model, quant_config=quant_config, example_inputs=example_inputs)
|
||||
run_fn(prepared_model)
|
||||
q_model = convert(prepared_model)
|
||||
```
|
||||
`SmoothQuantConfig` description:
|
||||
|
||||
`alpha`: a smooth factor to calculate the conversion per-channel scale and balance the quantization difficulty of activation and weight. Float value, default is 0.5.
|
||||
|
||||
> **Note:** Alpha="auto" and alpha auto-tuning was supported in old API, please stay tuned for the new API's support for auto alpha.
|
||||
|
||||
### Specify Quantization Rules
|
||||
Intel(R) Neural Compressor support specify quantization rules by operator type for Smooth Quantization. Users can use `set_local` to fallback op type in `SmoothQuantConfig` to achieve the above purpose.
|
||||
|
||||
Here we don't quantize `Linear` layers.
|
||||
```python
|
||||
# fallback by op_type
|
||||
quant_config.set_local("Linear", SmoothQuantConfig(w_dtype="fp32", act_dtype="fp32"))
|
||||
prepared_model = prepare(model, quant_config=quant_config, example_inputs=example_inputs)
|
||||
run_fn(prepared_model)
|
||||
q_model = convert(prepared_model)
|
||||
```
|
||||
|
||||
To get more information, please refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant).
|
||||
|
||||
|
||||
## Validated Models
|
||||
Neural Compressor: 2.1
|
||||
|
||||
IPEX (Intel Extension for PyTorch): 2.0/2.1
|
||||
|
||||
Dataset: lambada_openai
|
||||
|
||||
Task: text-generation provided by [ITREX](https://github.com/intel/intel-extension-for-transformers/tree/main/examples/huggingface/pytorch/text-generation/quantization)
|
||||
|
||||
alpha [0.4, 0.6] is sweet spot region in SmoothQuant paper.
|
||||
|
||||
A list of models that achieved a <1% accuracy drop is shown below.
|
||||
|
||||
| Model/Last token accuracy | FP32 Accuracy | INT8 (w/ SmoothQuant) | Notes |
|
||||
|:----------:|:------:|:------:|-----------------------------------|
|
||||
| bigscience/bloom-560m | 0.354 | 0.3542 | alpha=0.5, Ipex 2.1 |
|
||||
| bigscience/bloom-1b7 | 0.4634 | 0.4936 | alpha=0.5, Ipex 2.0 |
|
||||
| bigscience/bloom-3b | 0.518 | 0.5185 | alpha=0.8, Ipex 2.1 |
|
||||
| bigscience/bloom-7b1 | 0.5764 | 0.5977 | alpha=0.5, Ipex 2.0 |
|
||||
| bigscience/bloomz-560m | 0.3947 | 0.3930 | alpha=0.8, Ipex 2.1 |
|
||||
| bigscience/bloomz-1b7 | 0.4828 | 0.4906 | alpha=0.5, Ipex 2.1 |
|
||||
| bigscience/bloomz-3b | 0.5018 | 0.4980 | alpha=0.5, Ipex 2.1 |
|
||||
| bigscience/bloomz-7b1 | 0.5593 | 0.5552 | alpha=0.5, Ipex 2.1 |
|
||||
| facebook/opt-125m | 0.379 | 0.3757 | alpha=0.5, Ipex 2.1 |
|
||||
| facebook/opt-350m | 0.4516 | 0.4533 | alpha=0.8, Ipex 2.1 |
|
||||
| facebook/opt-1.3b | 0.5789 | 0.5742 | alpha=0.8, Ipex 2.0 |
|
||||
| facebook/opt-2.7b | 0.6365 | 0.6404 | alpha=0.5, Ipex 2.0 |
|
||||
| facebook/opt-6.7b | 0.6769 | 0.6804 | alpha=0.5, Ipex 2.0 |
|
||||
| facebook/opt-13b | 0.6872 | 0.6814 | alpha=0.5, Ipex 2.1 |
|
||||
| facebook/opt-30b | 0.7149 | 0.7128 | alpha=0.5, Ipex 2.1 |
|
||||
| facebook/opt-66b | 0.7398 | 0.7326 | alpha=0.5, Ipex 2.1 |
|
||||
| LLaMa-7b | 0.7361 | 0.7357 | alpha=0.8, Ipex 2.1 |
|
||||
| LLaMa-13b | 0.7627 | 0.7590 | alpha=0.7, Ipex 2.1 |
|
||||
| LLaMa-30b | 0.7759 | 0.7840 | alpha=0.7, Ipex 2.1 |
|
||||
| LLaMa-65b | 0.7908 | 0.7957 | alpha=0.9, Ipex 2.1 |
|
||||
| EleutherAI/gpt-j-6B* | 0.6831 | 0.6821 | alpha=1.0, Ipex 2.1 |
|
||||
| MBZUAI/LaMini-GPT-124m | 0.3804 | 0.3887 | alpha=0.5, Ipex 2.1 |
|
||||
| MBZUAI/LaMini-GPT-774m | 0.5048 | 0.5057 | alpha=0.5, Ipex 2.1 |
|
||||
| MBZUAI/LaMini-GPT-1.5b | 0.5443 | 0.5436 | alpha=0.5, Ipex 2.1 |
|
||||
| mosaicml/mpt-7b-chat | 0.655 | 0.6499 | alpha=0.7, Ipex 2.1 |
|
||||
| stabilityai/stablelm-base-alpha-3b | 0.4172 | 0.4149 | alpha=0.6, Ipex 2.1 |
|
||||
| togethercomputer/RedPajama-INCITE-Base-3B-v1 | 0.6542 | 0.6735 | alpha=0.5, Ipex 2.1 |
|
||||
| togethercomputer/RedPajama-INCITE-Chat-3B-v1* | 0.6718 | 0.6740 | alpha=0.5, Ipex 2.0 |
|
||||
| togethercomputer/RedPajama-INCITE-Instruct-3B-v1* | 0.6569 | 0.6621 | alpha=0.5, Ipex 2.0 |
|
||||
| togethercomputer/RedPajama-INCITE-Base-7B-v0.1* | 0.7143 | 0.7221 | alpha=0.5, Ipex 2.0 |
|
||||
| togethercomputer/RedPajama-INCITE-Instruct-7B-v0.1* | 0.6895 | 0.6953 | alpha=0.5, Ipex 2.0 |
|
||||
| databricks/dolly-v1-6b* | 0.6866 | 0.6895 | alpha=0.8, Ipex 2.1 |
|
||||
| databricks/dolly-v2-3b* | 0.6297 | 0.6247 | alpha=0.5, Ipex 2.1 |
|
||||
| tiiuae/falcon-7b-instruct | 0.6437 | 0.6392 | alpha=0.7, Pytorch |
|
||||
|
||||
Please refer to the step-by-step [instruction](../../examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/ipex/README.md) for details.
|
||||
|
||||
Please note that for models with asterisk(*), we have set all add ops to FP32 during quantization step to achieve desirable results.
|
||||
|
||||
|
||||
## Supported Framework Matrix
|
||||
|
||||
| Framework | Alpha | Folding |
|
||||
|:---------:|--------------|------------|
|
||||
| PyTorch | [0-1] | False |
|
||||
| IPEX | [0-1] | True / False(Version>2.1) |
|
@@ -0,0 +1,108 @@
|
||||
PyTorch Static Quantization
|
||||
========================================
|
||||
1. [Introduction](#introduction)
|
||||
2. [Get Started](#get-started) \
|
||||
2.1 [Static Quantization with IPEX Backend](#static-quantization-with-ipex-backend) \
|
||||
2.1.1 [Usage Sample with IPEX](#usage-sample-with-ipex) \
|
||||
2.1.2 [Specify Quantization Rules](#specify-quantization-rules) \
|
||||
2.1.3 [Model Examples](#model-examples) \
|
||||
2.2 [Static Quantization with PT2E Backend](#static-quantization-with-pt2e-backend) \
|
||||
2.2.1 [Usage Sample with PT2E](#usage-sample-with-pt2e)
|
||||
2.2.2 [Model Examples with PT2E](#model-examples-with-pt2e)
|
||||
|
||||
|
||||
## Introduction
|
||||
|
||||
Post-Training Quantization (PTQ) is a technique used to convert a pre-trained floating-point model to a quantized model. This approach does not require model retraining. Instead, it uses calibration data to determine the optimal quantization parameters. Static quantization involves calibrating both weights and activations during the quantization process. Currently, we support two paths to perform static PTQ: [Intel Extension for PyTorch (IPEX)](https://github.com/intel/intel-extension-for-pytorch) and [PyTorch 2 Export Quantization (PT2E)](https://pytorch.org/tutorials/prototype/pt2e_quant_x86_inductor.html).
|
||||
|
||||
## Get Started
|
||||
|
||||
### Static Quantization with IPEX Backend
|
||||
|
||||
Intel Extension for PyTorch (IPEX) provides optimizations specifically for Intel hardware, improving the performance of PyTorch models through efficient execution on CPUs. IPEX supports PTQ, allowing users to quantize models to lower precision to reduce model size and inference time while maintaining accuracy.
|
||||
|
||||
The design philosophy of the quantization interface of Intel(R) Neural Compressor is easy-of-use. It requests user to provide `model`, `calibration function`, and `example inputs`. Those parameters would be used to quantize and tune the model.
|
||||
|
||||
`model` is the framework model location or the framework model object.
|
||||
|
||||
`calibration function` is used to determine the appropriate quantization parameters, such as `scale` and `zero-point`, for the model's weights and activations. This process is crucial for minimizing the loss of accuracy that can occur when converting from floating-point to lower-precision format.
|
||||
|
||||
IPEX leverages just-in-time (JIT) compilation techniques for optimizing the model. `example inputs` is used to trace the computational graph of the model, enabling various optimizations and transformations that are specific to IPEX. This tracing process captures the operations performed by the model, allowing IPEX to apply quantization optimizations effectively. `example inputs` should be representative of the actual data the model will process to ensure accurate calibration.
|
||||
|
||||
|
||||
#### Usage Sample with IPEX
|
||||
```python
|
||||
import intel_extension_for_pytorch as ipex
|
||||
from neural_compressor.torch.quantization import StaticQuantConfig, convert, prepare
|
||||
|
||||
quant_config = StaticQuantConfig(act_sym=True, act_algo="minmax")
|
||||
prepared_model = prepare(model, quant_config=quant_config, example_inputs=example_inputs)
|
||||
run_fn(prepared_model)
|
||||
q_model = convert(prepared_model)
|
||||
```
|
||||
|
||||
> [!IMPORTANT]
|
||||
> To use static quantization with the IPEX backend, please explicitly import IPEX at the beginning of your program.
|
||||
|
||||
#### Specify Quantization Rules
|
||||
Intel(R) Neural Compressor support specify quantization rules by operator name or operator type. Users can use `set_local` to fallback either `op_name` or `op_type` in `StaticQuantConfig` to achieve the above purpose.
|
||||
|
||||
1. Example of `op_name_dict`
|
||||
Here we don't quantize the layer named `fc1`.
|
||||
```python
|
||||
# fallback by op_name
|
||||
quant_config.set_local("fc1", StaticQuantConfig(w_dtype="fp32", act_dtype="fp32"))
|
||||
prepared_model = prepare(fp32_model, quant_config=quant_config, example_inputs=example_inputs)
|
||||
run_fn(prepared_model)
|
||||
q_model = convert(prepared_model)
|
||||
```
|
||||
2. Example of `op_type_dict`
|
||||
Here we don't quantize `Linear` layers.
|
||||
```python
|
||||
# fallback by op_type
|
||||
quant_config.set_local("Linear", StaticQuantConfig(w_dtype="fp32", act_dtype="fp32"))
|
||||
prepared_model = prepare(model, quant_config=quant_config, example_inputs=example_inputs)
|
||||
run_fn(prepared_model)
|
||||
q_model = convert(prepared_model)
|
||||
```
|
||||
|
||||
#### Model Examples
|
||||
|
||||
Users could refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex) on how to quantize a new model.
|
||||
|
||||
|
||||
### Static Quantization with PT2E Backend
|
||||
Compared to the IPEX backend, which uses JIT compilation to capture the eager model, the PT2E path uses `torch.dynamo` to capture the eager model into an FX graph model, and then inserts the observers and Q/QD pairs on it. Finally it uses the `torch.compile` to perform the pattern matching and replace the Q/DQ pairs with optimized quantized operators.
|
||||
|
||||
#### Usage Sample with PT2E
|
||||
There are four steps to perform W8A8 static quantization with PT2E backend: `export`, `prepare`, `convert` and `compile`.
|
||||
|
||||
```python
|
||||
import torch
|
||||
from neural_compressor.torch.export import export
|
||||
from neural_compressor.torch.quantization import StaticQuantConfig, prepare, convert
|
||||
|
||||
# Prepare the float model and example inputs for export model
|
||||
model = UserFloatModel()
|
||||
example_inputs = ...
|
||||
|
||||
# Export eager model into FX graph model
|
||||
exported_model = export(model=model, example_inputs=example_inputs)
|
||||
# Quantize the model
|
||||
quant_config = StaticQuantConfig()
|
||||
prepared_model = prepare(exported_model, quant_config=quant_config)
|
||||
# Calibrate
|
||||
run_fn(prepared_model)
|
||||
q_model = convert(prepared_model)
|
||||
# Compile the quantized model and replace the Q/DQ pattern with Q-operator
|
||||
from torch._inductor import config
|
||||
|
||||
config.freezing = True
|
||||
opt_model = torch.compile(q_model)
|
||||
```
|
||||
|
||||
> Note: The `set_local` of `StaticQuantConfig` will be supported after the torch 2.4 release.
|
||||
|
||||
#### Model Examples with PT2E
|
||||
|
||||
Users could refer to [cv examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/cv/static_quant) and [llm examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e) on how to quantize a new model.
|
@@ -0,0 +1,329 @@
|
||||
|
||||
PyTorch Weight Only Quantization
|
||||
===============
|
||||
|
||||
- [Introduction](#introduction)
|
||||
- [Supported Matrix](#supported-matrix)
|
||||
- [Usage](#usage)
|
||||
- [Get Started](#get-started)
|
||||
- [Common arguments](#common-arguments)
|
||||
- [RTN](#rtn)
|
||||
- [GPTQ](#gptq)
|
||||
- [AutoRound](#autoround)
|
||||
- [AWQ](#awq)
|
||||
- [TEQ](#teq)
|
||||
- [HQQ](#hqq)
|
||||
- [Specify Quantization Rules](#specify-quantization-rules)
|
||||
- [Saving and Loading](#saving-and-loading)
|
||||
- [Layer Wise Quantization](#layer-wise-quantization)
|
||||
- [Efficient Usage on Client-Side](#efficient-usage-on-client-side)
|
||||
- [Examples](#examples)
|
||||
|
||||
## Introduction
|
||||
|
||||
As large language models (LLMs) become more prevalent, there is a growing need for new and improved quantization methods that can meet the computational demands of these modern architectures while maintaining the accuracy. Compared to normal quantization like W8A8, weight only quantization is probably a better trade-off to balance the performance and the accuracy, since we will see below that the bottleneck of deploying LLMs is the memory bandwidth and normally weight only quantization could lead to better accuracy.
|
||||
|
||||
Model inference: Roughly speaking , two key steps are required to get the model's result. The first one is moving the model from the memory to the cache piece by piece, in which, memory bandwidth $B$ and parameter count $P$ are the key factors, theoretically the time cost is $P*4 /B$. The second one is computation, in which, the device's computation capacity $C$ measured in FLOPS and the forward FLOPs $F$ play the key roles, theoretically the cost is $F/C$.
|
||||
|
||||
Text generation: The most famous application of LLMs is text generation, which predicts the next token/word based on the inputs/context. To generate a sequence of texts, we need to predict them one by one. In this scenario, $F\approx P$ if some operations like bmm are ignored and past key values have been saved. However, the $C/B$ of the modern device could be to **100X,** that makes the memory bandwidth as the bottleneck in this scenario.
|
||||
|
||||
Besides, as mentioned in many papers[1][2], activation quantization is the main reason to cause the accuracy drop. So for text generation task, weight only quantization is a preferred option in most cases.
|
||||
|
||||
Theoretically, round-to-nearest (RTN) is the most straightforward way to quantize weight using scale maps. However, when the number of bits is small (e.g. 3), the MSE loss is larger than expected. A group size is introduced to reduce elements using the same scale to improve accuracy.
|
||||
|
||||
## Supported Matrix
|
||||
|
||||
| Algorithms/Backend | PyTorch eager mode |
|
||||
|--------------|----------|
|
||||
| RTN | ✔ |
|
||||
| GPTQ | ✔ |
|
||||
| AutoRound| ✔ |
|
||||
| AWQ | ✔ |
|
||||
| TEQ | ✔ |
|
||||
| HQQ | ✔ |
|
||||
> **RTN:** A quantification method that we can think of very intuitively. It does not require additional datasets and is a very fast quantization method. Generally speaking, RTN will convert the weight into a uniformly distributed integer data type, but some algorithms, such as Qlora, propose a non-uniform NF4 data type and prove its theoretical optimality.
|
||||
|
||||
> **GPTQ:** A new one-shot weight quantization method based on approximate second-order information, that is both highly-accurate and highly efficient[4]. The weights of each column are updated based on the fixed-scale pseudo-quantization error and the inverse of the Hessian matrix calculated from the activations. The updated columns sharing the same scale may generate a new max/min value, so the scale needs to be saved for restoration.
|
||||
|
||||
> **AutoRound:** AutoRound is an advanced weight-only quantization algorithm for low-bits LLM inference. It's tailored for a wide range of models and consistently delivers noticeable improvements, often significantly outperforming SignRound[5] with the cost of more tuning time for quantization.
|
||||
|
||||
> **AWQ:** Proved that protecting only 1% of salient weights can greatly reduce quantization error. the salient weight channels are selected by observing the distribution of activation and weight per channel. The salient weights are also quantized after multiplying a big scale factor before quantization for preserving.
|
||||
|
||||
> **TEQ:** A trainable equivalent transformation that preserves the FP32 precision in weight-only quantization. It is inspired by AWQ while providing a new solution to search for the optimal per-channel scaling factor between activations and weights.
|
||||
|
||||
> **HQQ:** The HQQ[6] method focuses specifically on minimizing errors in the weights rather than the layer activation. Additionally, by incorporating a sparsity-promoting loss, such as the $l_{p<1}$-norm, we effectively model outliers through a hyper-Laplacian distribution. This distribution more accurately captures the heavy-tailed nature of outlier errors compared to the squared error, resulting in a more nuanced representation of error distribution.
|
||||
|
||||
## Usage
|
||||
|
||||
### Get Started
|
||||
|
||||
WeightOnlyQuant quantization for PyTorch is using prepare and convert [APIs](./PyTorch.md#quantization-apis).
|
||||
|
||||
#### Common arguments
|
||||
|
||||
| Config | Capability |
|
||||
|---|---|
|
||||
| dtype (str)| ['int', 'nf4', 'fp4'] |
|
||||
| bits (int)| [1, ..., 8] |
|
||||
| group_size (int)| [-1, 1, ..., $C_{in}$] |
|
||||
| use_sym (bool)| [True, False] |
|
||||
| quant_lm_head (bool)| [False, True] |
|
||||
| use_double_quant (bool) | [True, False] |
|
||||
| double_quant_dtype (str) | ['int'] |
|
||||
| double_quant_bits (int) | [1, ..., bits] |
|
||||
| double_quant_use_sym (bool) | [True, False] |
|
||||
| double_quant_group_size (int) | [-1, 1, ..., $C_{in}$] |
|
||||
|
||||
Notes:
|
||||
|
||||
- *group_size = -1* refers to **per output channel quantization**. Taking a linear layer (input channel = $C_{in}$, output channel = $C_{out}$) for instance, when *group size = -1*, quantization will calculate total $C_{out}$ quantization parameters. Otherwise, when *group_size = gs* quantization parameters are calculate with every $gs$ elements along with the input channel, leading to total $C_{out} \times (C_{in} / gs)$ quantization parameters.
|
||||
- 4-bit NormalFloat(NF4) is proposed in QLoRA[7]. 'fp4' includes [fp4_e2m1](../../neural_compressor/adaptor/torch_utils/weight_only.py#L37) and [fp4_e2m1_bnb](https://github.com/TimDettmers/bitsandbytes/blob/18e827d666fa2b70a12d539ccedc17aa51b2c97c/bitsandbytes/functional.py#L735). By default, fp4 refers to fp4_e2m1_bnb.
|
||||
- *quant_lm_head* defaults to False. This means that, except for transformer blocks, the last layer in transformer models will not be quantized by default. The last layer may be named "lm_head", "output_layer" or "embed_out".
|
||||
- Only RTN and GPTQ support double quant.
|
||||
|
||||
#### RTN
|
||||
|
||||
| rtn_args | comments | default value |
|
||||
|----------|-------------|-------------------------------------------------------------------|
|
||||
| group_dim (int) | Dimension for grouping | 1 |
|
||||
| use_full_range (bool) | Enables full range for activations | False |
|
||||
| use_mse_search (bool) | Enables mean squared error (MSE) search | False |
|
||||
| use_layer_wise (bool) | Enables quantize model per layer | False |
|
||||
| model_path (str) | Model path that is used to load state_dict per layer | |
|
||||
|
||||
> **Notes:** `model_path` is only used when use_layer_wise=True. `layer-wise` is stay-tuned.
|
||||
|
||||
``` python
|
||||
# Quantization code
|
||||
from neural_compressor.torch.quantization import prepare, convert, RTNConfig
|
||||
|
||||
quant_config = RTNConfig()
|
||||
model = prepare(model, quant_config)
|
||||
model = convert(model)
|
||||
```
|
||||
|
||||
#### GPTQ
|
||||
|
||||
| gptq_args | comments | default value |
|
||||
|----------|-------------|-------------------------------------------------------------------|
|
||||
| use_mse_search (bool) | Enables mean squared error (MSE) search | False
|
||||
| use_layer_wise (bool) | Enables quantize model per layer | False |
|
||||
| model_path (str) | Model path that is used to load state_dict per layer | |
|
||||
| use_double_quant (bool) | Enables double quantization | False |
|
||||
| act_order (bool) | Whether to sort Hessian's diagonal values to rearrange channel-wise quantization order | False |
|
||||
| percdamp (float) | Percentage of Hessian's diagonal values' average, which will be added to Hessian's diagonal to increase numerical stability | 0.01 |
|
||||
| block_size (int) | Execute GPTQ quantization per block, block shape = [C_out, block_size] | 128 |
|
||||
| static_groups (bool) | Whether to calculate group wise quantization parameters in advance. This option mitigate actorder's extra computational requirements. | False |
|
||||
| true_sequential (bool) | Whether to quantize layers within a transformer block in their original order. This can lead to higher accuracy but slower overall quantization process. | False |
|
||||
> **Note:** `model_path` is only used when use_layer_wise=True. `layer-wise` is stay-tuned.
|
||||
|
||||
``` python
|
||||
# Quantization code
|
||||
from neural_compressor.torch.quantization import prepare, convert, GPTQConfig
|
||||
|
||||
quant_config = GPTQConfig()
|
||||
model = prepare(model, quant_config)
|
||||
run_fn(model) # calibration
|
||||
model = convert(model)
|
||||
```
|
||||
|
||||
#### AutoRound
|
||||
|
||||
| autoround_args | comments | default value |
|
||||
|----------|-------------|-------------------------------------------------------------------|
|
||||
| enable_full_range (bool) | Whether to enable full range quantization | False
|
||||
| batch_size (int) | Batch size for training | 8 |
|
||||
| lr_scheduler | The learning rate scheduler to be used | None |
|
||||
| enable_quanted_input (bool) | Whether to use quantized input data | True |
|
||||
| enable_minmax_tuning (bool) | Whether to enable min-max tuning | True |
|
||||
| lr (float) | The learning rate | 0 |
|
||||
| minmax_lr (float) | The learning rate for min-max tuning | None |
|
||||
| low_gpu_mem_usage (bool) | Whether to use low GPU memory | True |
|
||||
| iters (int) | Number of iterations | 200 |
|
||||
| seqlen (int) | Length of the sequence | 2048 |
|
||||
| n_samples (int) | Number of samples | 512 |
|
||||
| sampler (str) | The sampling method | "rand" |
|
||||
| seed (int) | The random seed | 42 |
|
||||
| n_blocks (int) | Number of blocks | 1 |
|
||||
| gradient_accumulate_steps (int) | Number of gradient accumulation steps | 1 |
|
||||
| not_use_best_mse (bool) | Whether to use mean squared error | False |
|
||||
| dynamic_max_gap (int) | The dynamic maximum gap | -1 |
|
||||
| scale_dtype (str) | The data type of quantization scale to be used, different kernels have different choices | "float16" |
|
||||
|
||||
``` python
|
||||
# Quantization code
|
||||
from neural_compressor.torch.quantization import prepare, convert, AutoRoundConfig
|
||||
|
||||
quant_config = AutoRoundConfig()
|
||||
model = prepare(model, quant_config)
|
||||
run_fn(model) # calibration
|
||||
model = convert(model)
|
||||
```
|
||||
|
||||
#### AWQ
|
||||
|
||||
| awq_args | comments | default value |
|
||||
|----------|-------------|-------------------------------------------------------------------|
|
||||
| group_dim (int) | Dimension for grouping | 1 |
|
||||
| use_full_range (bool) | Enables full range for activations | False |
|
||||
| use_mse_search (bool) | Enables mean squared error (MSE) search | False |
|
||||
| use_layer_wise (bool) | Enables quantize model per layer | False |
|
||||
| use_auto_scale (bool) | Enables best scales search based on activation distribution | True |
|
||||
| use_auto_clip (bool) | Enables clip range search | True |
|
||||
| folding(bool) | Allow insert mul before linear when the scale cannot be absorbed by last layer | False. |
|
||||
> **Notes:** `layer-wise` is stay-tuned.
|
||||
|
||||
``` python
|
||||
# Quantization code
|
||||
from neural_compressor.torch.quantization import prepare, convert, AWQConfig
|
||||
|
||||
quant_config = AWQConfig()
|
||||
model = prepare(model, quant_config, example_inputs=example_inputs)
|
||||
run_fn(model) # calibration
|
||||
model = convert(model)
|
||||
```
|
||||
|
||||
#### TEQ
|
||||
|
||||
| teq_args | comments | default value |
|
||||
|----------|-------------|-------------------------------------------------------------------|
|
||||
| group_dim (int) | Dimension for grouping | 1 |
|
||||
| use_full_range (bool) | Enables full range for activations | False |
|
||||
| use_mse_search (bool) | Enables mean squared error (MSE) search | False |
|
||||
| use_layer_wise (bool) | Enables quantize model per layer | False |
|
||||
| use_double_quant (bool) | Enables double quantization | False |
|
||||
| folding(bool) | Allow insert mul before linear when the scale cannot be absorbed by last layer | False |
|
||||
> **Notes:** `layer-wise` is stay-tuned.
|
||||
|
||||
``` python
|
||||
# Quantization code
|
||||
from neural_compressor.torch.quantization import prepare, convert, TEQConfig
|
||||
|
||||
quant_config = TEQConfig()
|
||||
model = prepare(model, quant_config, example_inputs=example_inputs)
|
||||
train_fn(model) # calibration
|
||||
model = convert(model)
|
||||
```
|
||||
|
||||
#### HQQ
|
||||
|
||||
| hqq_args | comments | default value |
|
||||
|----------|-------------|-------------------------------------------------------------------|
|
||||
| quant_zero (bool) | Whether to quantize zero point | True |
|
||||
| quant_scale: (bool) | Whether to quantize scale: point | False |
|
||||
| scale_quant_group_size (int) | The group size for quantizing scale | 128 |
|
||||
|
||||
``` python
|
||||
# Quantization code
|
||||
from neural_compressor.torch.quantization import prepare, convert, HQQConfig
|
||||
|
||||
quant_config = HQQConfig()
|
||||
model = prepare(model, quant_config)
|
||||
run_fn(model) # calibration
|
||||
model = convert(model)
|
||||
```
|
||||
|
||||
### Specify Quantization Rules
|
||||
|
||||
Intel(R) Neural Compressor support specify quantization rules by operator name or operator type. Users can set `local` in dict or use `set_local` method of config class to achieve the above purpose.
|
||||
|
||||
1. Example of setting `local` from a dict
|
||||
|
||||
```python
|
||||
quant_config = {
|
||||
"rtn": {
|
||||
"global": {
|
||||
"dtype": "int",
|
||||
"bits": 4,
|
||||
"group_size": -1,
|
||||
"use_sym": True,
|
||||
},
|
||||
"local": {
|
||||
"lm_head": {
|
||||
"dtype": "fp32",
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. Example of using `set_local`
|
||||
|
||||
```python
|
||||
quant_config = RTNConfig()
|
||||
lm_head_config = RTNConfig(dtype="fp32")
|
||||
quant_config.set_local("lm_head", lm_head_config)
|
||||
```
|
||||
|
||||
### Saving and Loading
|
||||
|
||||
The saved_results folder contains two files: quantized_model.pt and qconfig.json, and the generated model is a quantized model. The quantitative model will include WeightOnlyLinear. To support low memory inference, Intel(R) Neural Compressor implemented WeightOnlyLinear, a torch.nn.Module, to compress the fake quantized fp32 model. Since torch does not provide flexible data type storage, WeightOnlyLinear combines low bits data into a long date type, such as torch.int8 and torch.int32. Low bits data includes weights and zero points. When using WeightOnlyLinear for inference, it will restore the compressed data to float32 and run torch linear function.
|
||||
|
||||
```python
|
||||
# Quantization code
|
||||
from neural_compressor.torch.quantization import prepare, convert, RTNConfig
|
||||
|
||||
quant_config = RTNConfig()
|
||||
model = prepare(model, quant_config)
|
||||
model = convert(model)
|
||||
|
||||
# save
|
||||
model.save("saved_results")
|
||||
|
||||
# load
|
||||
from neural_compressor.torch.quantization import load
|
||||
|
||||
orig_model = YOURMODEL()
|
||||
loaded_model = load(
|
||||
"saved_results", original_model=orig_model
|
||||
) # Please note that the original_model parameter passes the original model.
|
||||
```
|
||||
|
||||
## Layer Wise Quantization
|
||||
|
||||
As the size of LLMs continues to grow, loading the entire model into a single GPU card or the RAM of a client machine becomes impractical. To address this challenge, we introduce Layer-wise Quantization (LWQ), a method that quantizes LLMs layer by layer or block by block. This approach significantly reduces memory consumption. The diagram below illustrates the LWQ process.
|
||||
|
||||
<img src="./imgs/lwq.png" width=780 height=429>
|
||||
|
||||
*Figure 1: The process of layer-wise quantization for PyTorch model. The color grey means empty parameters and the color blue represents parameters need to be quantized. Every rectangle inside model represents one layer.*
|
||||
|
||||
|
||||
Currently, we support LWQ for `RTN`, `AutoRound`, and `GPTQ`.
|
||||
|
||||
Here, we take the `RTN` algorithm as example to demonstrate the usage of LWQ.
|
||||
|
||||
```python
|
||||
from neural_compressor.torch.quantization import RTNConfig, convert, prepare
|
||||
from neural_compressor.torch import load_empty_model
|
||||
|
||||
model_state_dict_path = "/path/to/model/state/dict"
|
||||
float_model = load_empty_model(model_state_dict_path)
|
||||
quant_config = RTNConfig(use_layer_wise=True)
|
||||
prepared_model = prepare(float_model, quant_config)
|
||||
quantized_model = convert(prepared_model)
|
||||
```
|
||||
|
||||
## Efficient Usage on Client-Side
|
||||
|
||||
For client machines with limited RAM and cores, we offer optimizations to reduce computational overhead and minimize memory usage. For detailed information, please refer to [Quantization on Client](https://github.com/intel/neural-compressor/blob/master/docs/source/3x/client_quant.md).
|
||||
|
||||
|
||||
## Examples
|
||||
|
||||
Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only) on how to quantize a model with WeightOnlyQuant.
|
||||
|
||||
## Reference
|
||||
|
||||
[1]. Xiao, Guangxuan, et al. "Smoothquant: Accurate and efficient post-training quantization for large language models." arXiv preprint arXiv:2211.10438 (2022).
|
||||
|
||||
[2]. Wei, Xiuying, et al. "Outlier suppression: Pushing the limit of low-bit transformer language models." arXiv preprint arXiv:2209.13325 (2022).
|
||||
|
||||
[3]. Lin, Ji, et al. "AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration." arXiv preprint arXiv:2306.00978 (2023).
|
||||
|
||||
[4]. Frantar, Elias, et al. "Gptq: Accurate post-training quantization for generative pre-trained transformers." arXiv preprint arXiv:2210.17323 (2022).
|
||||
|
||||
[5]. Cheng, Wenhua, et al. "Optimize Weight Rounding via Signed Gradient Descent for the Quantization of LLMs" arXiv preprint arXiv:2309.05516 (2023).
|
||||
|
||||
[6]. Badri, Hicham and Shaji, Appu. "Half-Quadratic Quantization of Large Machine Learning Models." [Online] Available: <https://mobiusml.github.io/hqq_blog/> (2023).
|
||||
|
||||
[7]. Dettmers, Tim, et al. "Qlora: Efficient finetuning of quantized llms." arXiv preprint arXiv:2305.14314 (2023).
|
278
uukssw/quote1/_ref/neural-compressor/docs/source/3x/PyTorch.md
Normal file
278
uukssw/quote1/_ref/neural-compressor/docs/source/3x/PyTorch.md
Normal file
@@ -0,0 +1,278 @@
|
||||
Torch
|
||||
=================================================
|
||||
|
||||
1. [Introduction](#introduction)
|
||||
2. [Torch-like APIs](#torch-like-apis)
|
||||
3. [Support matrix](#supported-matrix)
|
||||
4. [Common Problems](#common-problems)
|
||||
|
||||
## Introduction
|
||||
|
||||
`neural_compressor.torch` provides a Torch-like API and integrates various model compression methods fine-grained to the torch.nn.Module. Supports a comprehensive range of models, including but not limited to CV models, NLP models, and large language models. A variety of quantization methods are available, including classic INT8 quantization, SmoothQuant, and the popular weight-only quantization. Neural compressor also provides the latest research in simulation work, such as FP8 emulation quantization, MX data type emulation quantization.
|
||||
|
||||
In terms of ease of use, neural compressor is committed to providing an easy-to-use user interface and easy to extend the structure design, on the one hand, reuse the PyTorch prepare, convert API, on the other hand, through the Quantizer base class for prepare and convert customization to provide a convenient.
|
||||
|
||||
For more details, please refer to [link](https://github.com/intel/neural-compressor/discussions/1527) in Neural Compressor discussion space.
|
||||
|
||||
So far, `neural_compressor.torch` still relies on the backend to generate the quantized model and run it on the corresponding backend, but in the future, neural_compressor is planned to provide generalized device-agnostic Q-DQ model, so as to achieve one-time quantization and arbitrary deployment.
|
||||
|
||||
## Torch-like APIs
|
||||
|
||||
Currently, we provide below three user scenarios, through `prepare`&`convert`, `autotune` and `load` APIs.
|
||||
|
||||
- One-time quantization of the model
|
||||
- Get the best quantized model by setting the search scope and target
|
||||
- Direct deployment of the quantized model
|
||||
|
||||
### Quantization APIs
|
||||
|
||||
```python
|
||||
def prepare(
|
||||
model: torch.nn.Module,
|
||||
quant_config: BaseConfig,
|
||||
inplace: bool = True,
|
||||
example_inputs: Any = None,
|
||||
):
|
||||
"""Prepare the model for calibration.
|
||||
|
||||
Insert observers into the model so that it can monitor the input and output tensors during calibration.
|
||||
|
||||
Args:
|
||||
model (torch.nn.Module): origin model
|
||||
quant_config (BaseConfig): path to quantization config
|
||||
inplace (bool, optional): It will change the given model in-place if True.
|
||||
example_inputs (tensor/tuple/dict, optional): used to trace torch model.
|
||||
|
||||
Returns:
|
||||
prepared and calibrated module.
|
||||
"""
|
||||
```
|
||||
|
||||
```python
|
||||
def convert(
|
||||
model: torch.nn.Module,
|
||||
quant_config: BaseConfig = None,
|
||||
inplace: bool = True,
|
||||
):
|
||||
"""Convert the prepared model to a quantized model.
|
||||
|
||||
Args:
|
||||
model (torch.nn.Module): the prepared model
|
||||
quant_config (BaseConfig, optional): path to quantization config, for special usage.
|
||||
inplace (bool, optional): It will change the given model in-place if True.
|
||||
|
||||
Returns:
|
||||
The quantized model.
|
||||
"""
|
||||
```
|
||||
|
||||
### Autotune API
|
||||
|
||||
```python
|
||||
def autotune(
|
||||
model: torch.nn.Module,
|
||||
tune_config: TuningConfig,
|
||||
eval_fn: Callable,
|
||||
eval_args=None,
|
||||
run_fn=None,
|
||||
run_args=None,
|
||||
example_inputs=None,
|
||||
):
|
||||
"""The main entry of auto-tune.
|
||||
|
||||
Args:
|
||||
model (torch.nn.Module): _description_
|
||||
tune_config (TuningConfig): _description_
|
||||
eval_fn (Callable): for evaluation of quantized models.
|
||||
eval_args (tuple, optional): arguments used by eval_fn. Defaults to None.
|
||||
run_fn (Callable, optional): for calibration to quantize model. Defaults to None.
|
||||
run_args (tuple, optional): arguments used by run_fn. Defaults to None.
|
||||
example_inputs (tensor/tuple/dict, optional): used to trace torch model. Defaults to None.
|
||||
|
||||
Returns:
|
||||
The quantized model.
|
||||
"""
|
||||
```
|
||||
|
||||
### Load API
|
||||
|
||||
`neural_compressor.torch` links the save function to the quantized model. If `model.save` already exists, Neural Compressor renames the previous function to `model.orig_save`.
|
||||
|
||||
```python
|
||||
def save(self, output_dir="./saved_results"):
|
||||
"""
|
||||
Args:
|
||||
self (torch.nn.Module): the quantized model.
|
||||
output_dir (str, optional): path to save the quantized model
|
||||
"""
|
||||
```
|
||||
|
||||
```python
|
||||
def load(output_dir="./saved_results", model=None):
|
||||
"""The main entry of load for all algorithms.
|
||||
|
||||
Args:
|
||||
output_dir (str, optional): path to quantized model folder. Defaults to "./saved_results".
|
||||
model (torch.nn.Module, optional): original model, suggest to use empty tensor.
|
||||
|
||||
Returns:
|
||||
The quantized model
|
||||
"""
|
||||
```
|
||||
|
||||
## Supported Matrix
|
||||
|
||||
<table class="tg"><thead>
|
||||
<tr>
|
||||
<th class="tg-9wq8">Method<br></th>
|
||||
<th class="tg-9wq8">Algorithm</th>
|
||||
<th class="tg-9wq8">Backend</th>
|
||||
<th class="tg-9wq8">Support Status</th>
|
||||
<th class="tg-9wq8">Usage Link</th>
|
||||
</tr></thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td class="tg-9wq8" rowspan="6">Weight Only Quantization<br></td>
|
||||
<td class="tg-9wq8">Round to Nearest (RTN)<br></td>
|
||||
<td class="tg-9wq8">PyTorch eager mode</td>
|
||||
<td class="tg-9wq8">✔</td>
|
||||
<td class="tg-9wq8"><a href="PT_WeightOnlyQuant.md#rtn">link</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-9wq8"><a href=https://arxiv.org/abs/2210.17323>GPTQ</a><br></td>
|
||||
<td class="tg-9wq8">PyTorch eager mode</td>
|
||||
<td class="tg-9wq8">✔</td>
|
||||
<td class="tg-9wq8"><a href="PT_WeightOnlyQuant.md#gptq">link</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-9wq8"><a href=https://arxiv.org/abs/2306.00978>AWQ</a></td>
|
||||
<td class="tg-9wq8">PyTorch eager mode</td>
|
||||
<td class="tg-9wq8">✔</td>
|
||||
<td class="tg-9wq8"><a href="PT_WeightOnlyQuant.md#awq">link</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-9wq8"><a href=https://arxiv.org/abs/2309.05516>AutoRound</a></td>
|
||||
<td class="tg-9wq8">PyTorch eager mode</td>
|
||||
<td class="tg-9wq8">✔</td>
|
||||
<td class="tg-9wq8"><a href="PT_WeightOnlyQuant.md#autoround">link</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-9wq8"><a href=https://arxiv.org/abs/2310.10944>TEQ</a></td>
|
||||
<td class="tg-9wq8">PyTorch eager mode</td>
|
||||
<td class="tg-9wq8">✔</td>
|
||||
<td class="tg-9wq8"><a href="PT_WeightOnlyQuant.md#teq">link</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-9wq8"><a href=https://mobiusml.github.io/hqq_blog>HQQ</a></td>
|
||||
<td class="tg-9wq8">PyTorch eager mode</td>
|
||||
<td class="tg-9wq8">✔</td>
|
||||
<td class="tg-9wq8"><a href="PT_WeightOnlyQuant.md#hqq">link</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-9wq8">Smooth Quantization</td>
|
||||
<td class="tg-9wq8"><a href=https://proceedings.mlr.press/v202/xiao23c.html>SmoothQuant</a></td>
|
||||
<td class="tg-9wq8"><a href=https://pytorch.org/tutorials/recipes/recipes/intel_extension_for_pytorch.html>intel-extension-for-pytorch</a></td>
|
||||
<td class="tg-9wq8">✔</td>
|
||||
<td class="tg-9wq8"><a href="PT_SmoothQuant.md">link</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-9wq8" rowspan="3">Static Quantization</td>
|
||||
<td class="tg-9wq8" rowspan="3"><a href=https://pytorch.org/docs/master/quantization.html#post-training-static-quantization>Post-traning Static Quantization</a></td>
|
||||
<td class="tg-9wq8">intel-extension-for-pytorch (INT8)</td>
|
||||
<td class="tg-9wq8">✔</td>
|
||||
<td class="tg-9wq8"><a href="PT_StaticQuant.md">link</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-9wq8"><a href=https://pytorch.org/docs/stable/torch.compiler_deepdive.html>TorchDynamo (INT8)</a></td>
|
||||
<td class="tg-9wq8">✔</td>
|
||||
<td class="tg-9wq8"><a href="PT_StaticQuant.md">link</a></td>
|
||||
<tr>
|
||||
<td class="tg-9wq8"><a href=https://docs.habana.ai/en/latest/index.html>Intel Gaudi AI accelerator (FP8)</a></td>
|
||||
<td class="tg-9wq8">✔</td>
|
||||
<td class="tg-9wq8"><a href="PT_FP8Quant.md">link</a></td>
|
||||
</tr>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-9wq8">Dynamic Quantization</td>
|
||||
<td class="tg-9wq8"><a href=https://pytorch.org/docs/master/quantization.html#post-training-dynamic-quantization>Post-traning Dynamic Quantization</a></td>
|
||||
<td class="tg-9wq8">TorchDynamo</td>
|
||||
<td class="tg-9wq8">✔</td>
|
||||
<td class="tg-9wq8"><a href="PT_DynamicQuant.md">link</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-9wq8">MX Quantization</td>
|
||||
<td class="tg-9wq8"><a href=https://arxiv.org/pdf/2310.10537>Microscaling Data Formats for
|
||||
Deep Learning</a></td>
|
||||
<td class="tg-9wq8">PyTorch eager mode</td>
|
||||
<td class="tg-9wq8">✔</td>
|
||||
<td class="tg-9wq8"><a href="PT_MXQuant.md">link</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-9wq8">Mixed Precision</td>
|
||||
<td class="tg-9wq8"><a href=https://arxiv.org/abs/1710.03740>Mixed precision</a></td>
|
||||
<td class="tg-9wq8">PyTorch eager mode</td>
|
||||
<td class="tg-9wq8">✔</td>
|
||||
<td class="tg-9wq8"><a href="PT_MixPrecision.md">link</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-9wq8">Quantization Aware Training</td>
|
||||
<td class="tg-9wq8"><a href=https://pytorch.org/docs/master/quantization.html#quantization-aware-training-for-static-quantization>Quantization Aware Training</a></td>
|
||||
<td class="tg-9wq8">TorchDynamo</td>
|
||||
<td class="tg-9wq8">stay tuned</td>
|
||||
<td class="tg-9wq8">stay tuned</td>
|
||||
</tr>
|
||||
</tbody></table>
|
||||
|
||||
## Common Problems
|
||||
|
||||
1. How to choose backend between `intel-extension-for-pytorch` and `PyTorchDynamo`?
|
||||
> Neural Compressor provides automatic logic to detect which backend should be used.
|
||||
> <table class="tg"><thead>
|
||||
<tr>
|
||||
<th class="tg-9wq8">Environment</th>
|
||||
<th class="tg-9wq8">Automatic Backend</th>
|
||||
</tr></thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td class="tg-9wq8">import torch</td>
|
||||
<td class="tg-9wq8">torch.dynamo</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="tg-9wq8">import torch<br>import intel-extension-for-pytorch</td>
|
||||
<td class="tg-9wq8">intel-extension-for-pytorch</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
2. How to set different configuration for specific op_name or op_type?
|
||||
> Neural Compressor extends a `set_local` method based on the global configuration object to set custom configuration.
|
||||
|
||||
```python
|
||||
def set_local(self, operator_name_or_list: Union[List, str, Callable], config: BaseConfig) -> BaseConfig:
|
||||
"""Set custom configuration based on the global configuration object.
|
||||
|
||||
Args:
|
||||
operator_name_or_list (Union[List, str, Callable]): specific operator
|
||||
config (BaseConfig): specific configuration
|
||||
"""
|
||||
```
|
||||
|
||||
> Demo:
|
||||
|
||||
```python
|
||||
quant_config = RTNConfig() # Initialize global configuration with default bits=4
|
||||
quant_config.set_local(".*mlp.*", RTNConfig(bits=8)) # For layers with "mlp" in their names, set bits=8
|
||||
quant_config.set_local("Conv1d", RTNConfig(dtype="fp32")) # For Conv1d layers, do not quantize them.
|
||||
```
|
||||
|
||||
3. How to specify an accelerator?
|
||||
|
||||
> Neural Compressor provides automatic accelerator detection, including HPU, XPU, CUDA, and CPU.
|
||||
|
||||
> The automatically detected accelerator may not be suitable for some special cases, such as poor performance, memory limitations. In such situations, users can override the detected accelerator by setting the environment variable `INC_TARGET_DEVICE`.
|
||||
|
||||
> Usage:
|
||||
|
||||
```bash
|
||||
export INC_TARGET_DEVICE=cpu
|
||||
```
|
123
uukssw/quote1/_ref/neural-compressor/docs/source/3x/TF_Quant.md
Normal file
123
uukssw/quote1/_ref/neural-compressor/docs/source/3x/TF_Quant.md
Normal file
@@ -0,0 +1,123 @@
|
||||
|
||||
TensorFlow Quantization
|
||||
===============
|
||||
|
||||
1. [Introduction](#introduction)
|
||||
2. [Usage](#usage)
|
||||
2.1 [Without Accuracy Aware Tuning](#without-accuracy-aware-tuning)
|
||||
2.2 [With Accuracy Aware Tuning](#with-accuracy-aware-tuning)
|
||||
2.3 [Specify Quantization Rules](#specify-quantization-rules)
|
||||
3. [Examples](#examples)
|
||||
|
||||
## Introduction
|
||||
|
||||
`neural_compressor.tensorflow` supports quantizing both TensorFlow and Keras model with or without accuracy aware tuning.
|
||||
|
||||
For the detailed quantization fundamentals, please refer to the document for [Quantization](quantization.md).
|
||||
|
||||
|
||||
## Get Started
|
||||
|
||||
|
||||
### Without Accuracy Aware Tuning
|
||||
|
||||
|
||||
This means user could leverage Intel(R) Neural Compressor to directly generate a fully quantized model without accuracy aware tuning. It's user responsibility to ensure the accuracy of the quantized model meets expectation.
|
||||
|
||||
``` python
|
||||
# main.py
|
||||
|
||||
# Original code
|
||||
model = tf.keras.applications.resnet50.ResNet50(weights="imagenet")
|
||||
val_dataset = ...
|
||||
val_dataloader = MyDataloader(dataset=val_dataset)
|
||||
|
||||
# Quantization code
|
||||
from neural_compressor.tensorflow import quantize_model, StaticQuantConfig
|
||||
|
||||
quant_config = StaticQuantConfig()
|
||||
qmodel = quantize_model(
|
||||
model=model,
|
||||
quant_config=quant_config,
|
||||
calib_dataloader=val_dataloader,
|
||||
)
|
||||
qmodel.save("./output")
|
||||
```
|
||||
|
||||
### With Accuracy Aware Tuning
|
||||
|
||||
This means user could leverage the advance feature of Intel(R) Neural Compressor to tune out a best quantized model which has best accuracy and good performance. User should provide `eval_fn` and `eval_args`.
|
||||
|
||||
``` python
|
||||
# main.py
|
||||
|
||||
# Original code
|
||||
model = tf.keras.applications.resnet50.ResNet50(weights="imagenet")
|
||||
val_dataset = ...
|
||||
val_dataloader = MyDataloader(dataset=val_dataset)
|
||||
|
||||
|
||||
def eval_acc_fn(model) -> float:
|
||||
...
|
||||
return acc
|
||||
|
||||
|
||||
# Quantization code
|
||||
from neural_compressor.common.base_tuning import TuningConfig
|
||||
from neural_compressor.tensorflow import autotune
|
||||
|
||||
# it's also supported to define custom_tune_config as:
|
||||
# TuningConfig(StaticQuantConfig(weight_sym=[True, False], act_sym=[True, False]))
|
||||
custom_tune_config = TuningConfig(
|
||||
config_set=[
|
||||
StaticQuantConfig(weight_sym=True, act_sym=True),
|
||||
StaticQuantConfig(weight_sym=False, act_sym=False),
|
||||
]
|
||||
)
|
||||
best_model = autotune(
|
||||
model=model,
|
||||
tune_config=custom_tune_config,
|
||||
eval_fn=eval_acc_fn,
|
||||
calib_dataloader=val_dataloader,
|
||||
)
|
||||
best_model.save("./output")
|
||||
```
|
||||
|
||||
### Specify Quantization Rules
|
||||
Intel(R) Neural Compressor support specify quantization rules by operator name or operator type. Users can set `local` in dict or use `set_local` method of config class to achieve the above purpose.
|
||||
|
||||
1. Example of setting `local` from a dict
|
||||
```python
|
||||
quant_config = {
|
||||
"static_quant": {
|
||||
"global": {
|
||||
"weight_dtype": "int8",
|
||||
"weight_sym": True,
|
||||
"weight_granularity": "per_tensor",
|
||||
"act_dtype": "int8",
|
||||
"act_sym": True,
|
||||
"act_granularity": "per_tensor",
|
||||
},
|
||||
"local": {
|
||||
"conv1": {
|
||||
"weight_dtype": "fp32",
|
||||
"act_dtype": "fp32",
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
config = StaticQuantConfig.from_dict(quant_config)
|
||||
```
|
||||
2. Example of using `set_local`
|
||||
```python
|
||||
quant_config = StaticQuantConfig()
|
||||
conv2d_config = StaticQuantConfig(
|
||||
weight_dtype="fp32",
|
||||
act_dtype="fp32",
|
||||
)
|
||||
quant_config.set_local("conv1", conv2d_config)
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/tensorflow) on how to quantize a TensorFlow model with `neural_compressor.tensorflow`.
|
53
uukssw/quote1/_ref/neural-compressor/docs/source/3x/TF_SQ.md
Normal file
53
uukssw/quote1/_ref/neural-compressor/docs/source/3x/TF_SQ.md
Normal file
@@ -0,0 +1,53 @@
|
||||
# Smooth Quant
|
||||
|
||||
- [Smooth Quant](#smooth-quant)
|
||||
- [Introduction](#introduction)
|
||||
- [Usage](#usage)
|
||||
- [Using a Fixed `alpha`](#using-a-fixed-alpha)
|
||||
- [Determining the `alpha` through auto-tuning](#determining-the-alpha-through-auto-tuning)
|
||||
- [Examples](#examples)
|
||||
|
||||
|
||||
## Introduction
|
||||
|
||||
Quantization is a common compression operation to reduce memory and accelerate inference by converting the floating point matrix to an integer matrix. For large language models (LLMs) with gigantic parameters, the systematic outliers make quantification of activations difficult. [SmoothQuant](https://arxiv.org/abs/2211.10438), a training free post-training quantization (PTQ) solution, offline migrates this difficulty from activations to weights with a mathematically equivalent transformation.
|
||||
|
||||
Please refer to the document of [Smooth Quant](../quantization.md/#smooth-quant) for detailed fundamental knowledge.
|
||||
|
||||
|
||||
## Usage
|
||||
There are two ways to apply smooth quantization: 1) using a fixed `alpha` for the entire model or 2) determining the `alpha` through auto-tuning.
|
||||
|
||||
### Using a Fixed `alpha`
|
||||
To set a fixed alpha for the entire model, users can follow this example:
|
||||
|
||||
```python
|
||||
from neural_compressor.tensorflow import SmoothQuantConfig, StaticQuantConfig
|
||||
|
||||
quant_config = [SmoothQuantConfig(alpha=0.5), StaticQuantConfig()]
|
||||
q_model = quantize_model(output_graph_def, [sq_config, static_config], calib_dataloader)
|
||||
```
|
||||
The `SmoothQuantConfig` should be combined with `StaticQuantConfig` in a list because we still need to insert QDQ and apply pattern fusion after the smoothing process.
|
||||
|
||||
|
||||
### Determining the `alpha` through auto-tuning
|
||||
Users can search for the best `alpha` for the entire model.The tuning process looks for the optimal `alpha` value from a list of `alpha` values provided by the user.
|
||||
|
||||
Here is an example:
|
||||
|
||||
```python
|
||||
from neural_compressor.tensorflow import StaticQuantConfig, SmoothQuantConfig
|
||||
|
||||
custom_tune_config = TuningConfig(config_set=[SmoothQuantConfig(alpha=[0.5, 0.6, 0.7]), StaticQuantConfig()])
|
||||
best_model = autotune(
|
||||
model="fp32_model",
|
||||
tune_config=custom_tune_config,
|
||||
eval_fn=eval_fn_wrapper,
|
||||
calib_dataloader=calib_dataloader,
|
||||
)
|
||||
```
|
||||
> Please note that, it may a considerable amount of time as the tuning process applies each `alpha` to the entire model and uses the evaluation result on the entire dataset as the metric to determine the best `alpha`.
|
||||
|
||||
## Examples
|
||||
|
||||
Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant) on how to apply smooth quant to a TensorFlow model with `neural_compressor.tensorflow`.
|
@@ -0,0 +1,223 @@
|
||||
TensorFlow
|
||||
===============
|
||||
|
||||
|
||||
- [TensorFlow](#tensorflow)
|
||||
- [Introduction](#introduction)
|
||||
- [API for TensorFlow](#api-for-tensorflow)
|
||||
- [Support Matrix](#support-matrix)
|
||||
- [Quantization Scheme](#quantization-scheme)
|
||||
- [Quantization Approaches](#quantization-approaches)
|
||||
- [Post Training Static Quantization](#post-training-static-quantization)
|
||||
- [Smooth Quantization](#smooth-quantization)
|
||||
- [Mixed Precision](#mixed-precison)
|
||||
- [Backend and Device](#backend-and-device)
|
||||
|
||||
## Introduction
|
||||
|
||||
`neural_compressor.tensorflow` provides a integrated API for applying quantization on various TensorFlow model format, such as `pb`, `saved_model` and `keras`. The comprehensive range of supported models includes but not limited to CV models, NLP models, and large language models.
|
||||
|
||||
In terms of ease of use, neural compressor is committed to providing flexible and scalable user interfaces. While `quantize_model` is designed to provide a fast and straightforward quantization experience, the `autotune` offers an advanced option of reducing accuracy loss during quantization.
|
||||
|
||||
|
||||
## API for TensorFlow
|
||||
|
||||
Intel(R) Neural Compressor provides `quantize_model` and `autotune` as main interfaces for supported algorithms on TensorFlow framework.
|
||||
|
||||
|
||||
**quantize_model**
|
||||
|
||||
The design philosophy of the `quantize_model` interface is easy-of-use. With minimal parameters requirement, including `model`, `quant_config`, `calib_dataloader`, `calib_iteration`, it offers a straightforward choice of quantizing TF model in one-shot.
|
||||
|
||||
```python
|
||||
def quantize_model(
|
||||
model: Union[str, tf.keras.Model, BaseModel],
|
||||
quant_config: Union[BaseConfig, list],
|
||||
calib_dataloader: Callable = None,
|
||||
calib_iteration: int = 100,
|
||||
calib_func: Callable = None,
|
||||
):
|
||||
```
|
||||
`model` should be a string of the model's location, the object of Keras model or INC TF model wrapper class.
|
||||
|
||||
`quant_config` is either the `StaticQuantConfig` object or a list contains `SmoothQuantConfig` and `StaticQuantConfig` to indicate what algorithm should be used and what specific quantization rules should be applied.
|
||||
|
||||
`calib_dataloader` is used to load the data samples for calibration phase. In most cases, it could be the partial samples of the evaluation dataset.
|
||||
|
||||
`calib_iteration` is used to decide how many iterations the calibration process will be run.
|
||||
|
||||
`calib_func` is a substitution for `calib_dataloader` when the built-in calibration function of INC does not work for model inference.
|
||||
|
||||
|
||||
Here is a simple example of using `quantize_model` interface with a dummy calibration dataloader and the default `StaticQuantConfig`:
|
||||
```python
|
||||
from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
|
||||
from neural_compressor.tensorflow.utils import DummyDataset
|
||||
|
||||
dataset = DummyDataset(shape=(100, 32, 32, 3), label=True)
|
||||
calib_dataloader = MyDataLoader(dataset=dataset)
|
||||
quant_config = StaticQuantConfig()
|
||||
|
||||
qmodel = quantize_model("fp32_model.pb", quant_config, calib_dataloader)
|
||||
```
|
||||
**autotune**
|
||||
|
||||
The `autotune` interface, on the other hand, provides greater flexibility and power. It's particularly useful when accuracy is a critical factor. If the initial quantization doesn't meet the tolerance of accuracy loss, `autotune` will iteratively try quantization rules according to the `tune_config`.
|
||||
|
||||
Just like `quantize_model`, `autotune` requires `model`, `calib_dataloader` and `calib_iteration`. And the `eval_fn`, `eval_args` are used to build evaluation process.
|
||||
|
||||
|
||||
|
||||
```python
|
||||
def autotune(
|
||||
model: Union[str, tf.keras.Model, BaseModel],
|
||||
tune_config: TuningConfig,
|
||||
eval_fn: Callable,
|
||||
eval_args: Optional[Tuple[Any]] = None,
|
||||
calib_dataloader: Callable = None,
|
||||
calib_iteration: int = 100,
|
||||
calib_func: Callable = None,
|
||||
) -> Optional[BaseModel]:
|
||||
```
|
||||
`model` should be a string of the model's location, the object of Keras model or INC TF model wrapper class.
|
||||
|
||||
`tune_config` is the `TuningConfig` object which contains multiple quantization rules.
|
||||
|
||||
`eval_fn` is the evaluation function that measures the accuracy of a model.
|
||||
|
||||
`eval_args` is the supplemental arguments required by the defined evaluation function.
|
||||
|
||||
`calib_dataloader` is used to load the data samples for calibration phase. In most cases, it could be the partial samples of the evaluation dataset.
|
||||
|
||||
`calib_iteration` is used to decide how many iterations the calibration process will be run.
|
||||
|
||||
`calib_func` is a substitution for `calib_dataloader` when the built-in calibration function of INC does not work for model inference.
|
||||
|
||||
Here is a simple example of using `autotune` interface with different quantization rules defined by a list of `StaticQuantConfig`:
|
||||
```python
|
||||
from neural_compressor.common.base_tuning import TuningConfig
|
||||
from neural_compressor.tensorflow import StaticQuantConfig, autotune
|
||||
|
||||
calib_dataloader = MyDataloader(dataset=Dataset())
|
||||
custom_tune_config = TuningConfig(
|
||||
config_set=[
|
||||
StaticQuantConfig(weight_sym=True, act_sym=True),
|
||||
StaticQuantConfig(weight_sym=False, act_sym=False),
|
||||
]
|
||||
)
|
||||
best_model = autotune(
|
||||
model="baseline_model",
|
||||
tune_config=custom_tune_config,
|
||||
eval_fn=eval_acc_fn,
|
||||
calib_dataloader=calib_dataloader,
|
||||
)
|
||||
```
|
||||
|
||||
### Support Matrix
|
||||
|
||||
#### Quantization Scheme
|
||||
|
||||
| Framework | Backend Library | Symmetric Quantization | Asymmetric Quantization |
|
||||
| :-------------- |:---------------:| ---------------:|---------------:|
|
||||
| TensorFlow | [oneDNN](https://github.com/oneapi-src/oneDNN) | Activation (int8/uint8), Weight (int8) | - |
|
||||
| Keras | [ITEX](https://github.com/intel/intel-extension-for-tensorflow) | Activation (int8/uint8), Weight (int8) | - |
|
||||
|
||||
|
||||
+ Symmetric Quantization
|
||||
+ int8: scale = 2 * max(abs(rmin), abs(rmax)) / (max(int8) - min(int8) - 1)
|
||||
+ uint8: scale = max(rmin, rmax) / (max(uint8) - min(uint8))
|
||||
|
||||
|
||||
+ oneDNN: [Lower Numerical Precision Deep Learning Inference and Training](https://software.intel.com/content/www/us/en/develop/articles/lower-numerical-precision-deep-learning-inference-and-training.html)
|
||||
|
||||
#### Quantization Approaches
|
||||
|
||||
The supported Quantization methods for TensorFlow and Keras are listed below:
|
||||
<table class="center">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Types</th>
|
||||
<th>Quantization</th>
|
||||
<th>Dataset Requirements</th>
|
||||
<th>Framework</th>
|
||||
<th>Backend</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td rowspan="2" align="center">Post-Training Static Quantization (PTQ)</td>
|
||||
<td rowspan="2" align="center">weights and activations</td>
|
||||
<td rowspan="2" align="center">calibration</td>
|
||||
<td align="center">Keras</td>
|
||||
<td align="center"><a href="https://github.com/intel/intel-extension-for-tensorflow">ITEX</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="center">TensorFlow</td>
|
||||
<td align="center"><a href="https://github.com/tensorflow/tensorflow">TensorFlow</a>/<a href="https://github.com/Intel-tensorflow/tensorflow">Intel TensorFlow</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td rowspan="1" align="center">Smooth Quantization(SQ)</td>
|
||||
<td rowspan="1" align="center">weights</td>
|
||||
<td rowspan="1" align="center">calibration</td>
|
||||
<td align="center">Tensorflow</td>
|
||||
<td align="center"><a href="https://github.com/tensorflow/tensorflow">TensorFlow</a>/<a href="https://github.com/Intel-tensorflow/tensorflow">Intel TensorFlow</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td rowspan="1" align="center">Mixed Precision(MP)</td>
|
||||
<td rowspan="1" align="center">weights and activations</td>
|
||||
<td rowspan="1" align="center">NA</td>
|
||||
<td align="center">Tensorflow</td>
|
||||
<td align="center"><a href="https://github.com/tensorflow/tensorflow">TensorFlow</a>/<a href="https://github.com/Intel-tensorflow/tensorflow">Intel TensorFlow</a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
||||
|
||||
##### Post Training Static Quantization
|
||||
|
||||
The min/max range in weights and activations are collected offline on a so-called `calibration` dataset. This dataset should be able to represent the data distribution of those unseen inference dataset. The `calibration` process runs on the original fp32 model and dumps out all the tensor distributions for `Scale` and `ZeroPoint` calculations. Usually preparing 100 samples are enough for calibration.
|
||||
|
||||
Refer to the [PTQ Guide](./TF_Quant.md) for detailed information.
|
||||
|
||||
##### Smooth Quantization
|
||||
|
||||
Smooth Quantization (SQ) is an advanced quantization technique designed to optimize model performance while maintaining high accuracy. Unlike traditional quantization methods that can lead to significant accuracy loss, SQ focuses on a more refined approach by taking a balance between the scale of activations and weights.
|
||||
|
||||
Refer to the [SQ Guide](./TF_SQ.md) for detailed information.
|
||||
|
||||
##### Mixed Precision
|
||||
The Mixed Precision (MP) is enabled with Post Training Static Quantization. Once `BF16` is supported on machine, the matched operators will be automatically converted.
|
||||
|
||||
|
||||
#### Backend and Device
|
||||
Intel(R) Neural Compressor supports TF GPU with [ITEX-XPU](https://github.com/intel/intel-extension-for-tensorflow). We will automatically run model on GPU by checking if it has been installed.
|
||||
|
||||
<table class="center">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Framework</th>
|
||||
<th>Backend</th>
|
||||
<th>Backend Library</th>
|
||||
<th>Backend Value</th>
|
||||
<th>Support Device(cpu as default)</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td rowspan="2" align="left">TensorFlow</td>
|
||||
<td align="left">TensorFlow</td>
|
||||
<td align="left">OneDNN</td>
|
||||
<td align="left">"default"</td>
|
||||
<td align="left">cpu</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td align="left">ITEX</td>
|
||||
<td align="left">OneDNN</td>
|
||||
<td align="left">"itex"</td>
|
||||
<td align="left">cpu | gpu</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<br>
|
||||
<br>
|
@@ -0,0 +1,90 @@
|
||||
AutoTune
|
||||
========================================
|
||||
|
||||
1. [Overview](#overview)
|
||||
2. [How it Works](#how-it-works)
|
||||
3. [Working with Autotune](#working-with-autotune) \
|
||||
3.1 [Working with PyTorch Model](#working-with-pytorch-model) \
|
||||
3.1 [Working with Tensorflow Model](#working-with-tensorflow-model)
|
||||
|
||||
|
||||
## Overview
|
||||
|
||||
Intel® Neural Compressor aims to help users quickly deploy low-precision models by leveraging popular compression techniques, such as post-training quantization and weight-only quantization algorithms. Despite having a variety of these algorithms, finding the appropriate configuration for a model can be difficult and time-consuming. To address this, we built the `autotune` module based on the [strategy](./tuning_strategies.md) in 2.x for accuracy-aware tuning, which identifies the best algorithm configuration for models to achieve optimal performance under the certain accuracy criteria. This module allows users to easily use predefined tuning recipes and customize the tuning space as needed.
|
||||
|
||||
## How it Works
|
||||
|
||||
The autotune module constructs the tuning space according to the pre-defined tuning set or users' tuning set. It iterates the tuning space and applies the configuration on given float model then records and compares its evaluation result with the baseline. The tuning process stops when meeting the exit policy.
|
||||
|
||||
|
||||
## Working with Autotune
|
||||
|
||||
The `autotune` API is used across all of frameworks supported by INC. It accepts three primary arguments: `model`, `tune_config`, and `eval_fn`.
|
||||
|
||||
The `TuningConfig` class defines the tuning process, including the tuning space, order, and exit policy.
|
||||
|
||||
- Define the tuning space
|
||||
|
||||
User can define the tuning space by setting `config_set` with an algorithm configuration or a set of configurations.
|
||||
```python
|
||||
# Use the default tuning space
|
||||
config_set = get_woq_tuning_config()
|
||||
|
||||
# Customize the tuning space with one algorithm configurations
|
||||
config_set = RTNConfig(use_sym=False, group_size=[32, 64])
|
||||
|
||||
# Customize the tuning space with two algorithm configurations
|
||||
config_set = ([RTNConfig(use_sym=False, group_size=32), GPTQConfig(group_size=128, use_sym=False)],)
|
||||
```
|
||||
|
||||
- Define the tuning order
|
||||
|
||||
The tuning order determines how the process traverses the tuning space and samples configurations. Users can customize it by configuring the `sampler`. Currently, we provide the `default_sampler`, which samples configurations sequentially, always in the same order.
|
||||
|
||||
- Define the exit policy
|
||||
|
||||
The exit policy includes two components: accuracy goal (`tolerable_loss`) and the allowed number of trials (`max_trials`). The tuning process will stop when either condition is met.
|
||||
|
||||
### Working with PyTorch Model
|
||||
The example below demonstrates how to autotune a PyTorch model on four `RTNConfig` configurations.
|
||||
|
||||
```python
|
||||
from neural_compressor.torch.quantization import RTNConfig, TuningConfig, autotune
|
||||
|
||||
|
||||
def eval_fn(model) -> float:
|
||||
return ...
|
||||
|
||||
|
||||
tune_config = TuningConfig(
|
||||
config_set=RTNConfig(use_sym=[False, True], group_size=[32, 128]),
|
||||
tolerable_loss=0.2,
|
||||
max_trials=10,
|
||||
)
|
||||
q_model = autotune(model, tune_config=tune_config, eval_fn=eval_fn)
|
||||
```
|
||||
|
||||
### Working with Tensorflow Model
|
||||
|
||||
The example below demonstrates how to autotune a TensorFlow model on two `StaticQuantConfig` configurations.
|
||||
|
||||
```python
|
||||
from neural_compressor.tensorflow.quantization import StaticQuantConfig, autotune
|
||||
|
||||
calib_dataloader = MyDataloader(...)
|
||||
custom_tune_config = TuningConfig(
|
||||
config_set=[
|
||||
StaticQuantConfig(weight_sym=True, act_sym=True),
|
||||
StaticQuantConfig(weight_sym=False, act_sym=False),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def eval_fn(model) -> float:
|
||||
return ...
|
||||
|
||||
|
||||
best_model = autotune(
|
||||
model="baseline_model", tune_config=custom_tune_config, eval_fn=eval_fn, calib_dataloader=calib_dataloader
|
||||
)
|
||||
```
|
@@ -0,0 +1,61 @@
|
||||
Benchmark
|
||||
---
|
||||
|
||||
1. [Introduction](#introduction)
|
||||
|
||||
2. [Supported Matrix](#supported-matrix)
|
||||
|
||||
3. [Usage](#usage)
|
||||
|
||||
## Introduction
|
||||
|
||||
Intel Neural Compressor provides a command `incbench` to launch the Intel CPU performance benchmark.
|
||||
|
||||
To get the peak performance on Intel Xeon CPU, we should avoid crossing NUMA node in one instance.
|
||||
Therefore, by default, `incbench` will trigger 1 instance on the first NUMA node.
|
||||
|
||||
## Supported Matrix
|
||||
|
||||
| Platform | Status |
|
||||
|:---:|:---:|
|
||||
| Linux | ✔ |
|
||||
| Windows | ✔ |
|
||||
|
||||
## Usage
|
||||
|
||||
| Parameters | Default | comments |
|
||||
|:----------------------:|:------------------------:|:-------------------------------------:|
|
||||
| num_instances | 1 | Number of instances |
|
||||
| num_cores_per_instance | None | Number of cores in each instance |
|
||||
| C, cores | 0-${num_cores_on_NUMA-1} | decides the visible core range |
|
||||
| cross_memory | False | whether to allocate memory cross NUMA |
|
||||
|
||||
> Note: cross_memory is set to True only when memory is insufficient.
|
||||
|
||||
### General Use Cases
|
||||
|
||||
1. `incbench main.py`: run 1 instance on NUMA:0.
|
||||
2. `incbench --num_i 2 main.py`: run 2 instances on NUMA:0.
|
||||
3. `incbench --num_c 2 main.py`: run multi-instances with 2 cores per instance on NUMA:0.
|
||||
4. `incbench -C 24-47 main.py`: run 1 instance on COREs:24-47.
|
||||
5. `incbench -C 24-47 --num_c 4 main.py`: run multi-instances with 4 COREs per instance on COREs:24-47.
|
||||
|
||||
> Note:
|
||||
> - `num_i` works the same as `num_instances`
|
||||
> - `num_c` works the same as `num_cores_per_instance`
|
||||
|
||||
### Dump Throughput and Latency Summary
|
||||
|
||||
To merge benchmark results from multi-instances, "incbench" automatically checks log file messages for "throughput" and "latency" information matching the following patterns.
|
||||
|
||||
```python
|
||||
throughput_pattern = r"[T,t]hroughput:\s*([0-9]*\.?[0-9]+)\s*([a-zA-Z/]*)"
|
||||
latency_pattern = r"[L,l]atency:\s*([0-9]*\.?[0-9]+)\s*([a-zA-Z/]*)"
|
||||
```
|
||||
|
||||
#### Demo usage
|
||||
|
||||
```python
|
||||
print("Throughput: {:.3f} samples/sec".format(throughput))
|
||||
print("Latency: {:.3f} ms".format(latency * 10**3))
|
||||
```
|
@@ -0,0 +1,40 @@
|
||||
Quantization on Client
|
||||
==========================================
|
||||
|
||||
1. [Introduction](#introduction)
|
||||
2. [Get Started](#get-started)
|
||||
|
||||
## Introduction
|
||||
|
||||
For `RTN`, and `GPTQ` algorithms, we provide default algorithm configurations for different processor types (`client` and `sever`). Generally, lightweight configurations are tailored specifically for client devices to enhance performance and efficiency.
|
||||
|
||||
|
||||
## Get Started
|
||||
|
||||
Here, we take the `RTN` algorithm as example to demonstrate the usage on a client machine.
|
||||
|
||||
```python
|
||||
from neural_compressor.torch.quantization import get_default_rtn_config, convert, prepare
|
||||
from neural_compressor.torch import load_empty_model
|
||||
|
||||
model_state_dict_path = "/path/to/model/state/dict"
|
||||
float_model = load_empty_model(model_state_dict_path)
|
||||
quant_config = get_default_rtn_config()
|
||||
prepared_model = prepare(float_model, quant_config)
|
||||
quantized_model = convert(prepared_model)
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> By default, the appropriate configuration is determined based on hardware information, but users can explicitly specify `processor_type` as either `client` or `server` when calling `get_default_rtn_config`.
|
||||
|
||||
|
||||
For Windows machines, run the following command to utilize all available cores automatically:
|
||||
|
||||
```bash
|
||||
python main.py
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> For Linux systems, users need to configure the environment variables appropriately to achieve optimal performance. For example, set the `OMP_NUM_THREADS` explicitly. For processors with hybrid architecture (including both P-cores and E-cores), it is recommended to bind tasks to all P-cores using `taskset`.
|
||||
|
||||
RTN quantization is a quick process, finishing in tens of seconds and using several GB of RAM when working with 7B models, e.g.,[meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf). However, for the higher accuracy, GPTQ algorithm is recommended, but be prepared for a longer quantization time.
|
@@ -0,0 +1,16 @@
|
||||
Design
|
||||
=====
|
||||
|
||||
## Architecture
|
||||
|
||||
<a target="_blank" href="imgs/architecture.png">
|
||||
<img src="imgs/architecture.png" alt="Architecture">
|
||||
</a>
|
||||
|
||||
## Workflows
|
||||
|
||||
Intel® Neural Compressor provides two workflows: Quantization and Auto-tune.
|
||||
|
||||
<a target="_blank" href="imgs/workflow.png">
|
||||
<img src="imgs/workflow.png" alt="Workflow">
|
||||
</a>
|
@@ -0,0 +1,28 @@
|
||||
|
||||
### Version mapping between Intel Neural Compressor to Gaudi Software Stack ###
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Intel Neural Compressor</th>
|
||||
<th>Gaudi Software Stack</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>v3.0</td>
|
||||
<td>v1.17</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>v3.1</td>
|
||||
<td>v1.18</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>v3.2</td>
|
||||
<td>v1.19</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
BIN
uukssw/quote1/_ref/neural-compressor/docs/source/3x/imgs/architecture.png
(Stored with Git LFS)
Normal file
BIN
uukssw/quote1/_ref/neural-compressor/docs/source/3x/imgs/architecture.png
(Stored with Git LFS)
Normal file
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user