This commit is contained in:
louiscklaw
2025-01-31 22:10:02 +08:00
parent 97df42e0d5
commit 2627562070
2852 changed files with 748727 additions and 0 deletions

View File

@@ -0,0 +1,416 @@
import os
import shutil
import unittest
import torch
import torch.utils.data as data
from packaging.version import Version
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import neural_compressor.adaptor.pytorch as nc_torch
from neural_compressor import mix_precision, set_workspace
from neural_compressor.config import MixedPrecisionConfig
from neural_compressor.utils.pytorch import load
from neural_compressor.utils.utility import LazyImport
torch_utils = LazyImport("neural_compressor.adaptor.torch_utils")
os.environ["WANDB_DISABLED"] = "true"
os.environ["DISABLE_MLFLOW_INTEGRATION"] = "true"
MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english"
try:
import intel_extension_for_pytorch as ipex
TEST_IPEX = True
IPEX_VERSION = Version(ipex.__version__)
except:
TEST_IPEX = False
try:
torch.randn(1).to("xpu")
TEST_XPU = True
except:
TEST_XPU = False
torch.manual_seed(9527)
assert TEST_IPEX, "Please install intel extension for pytorch"
# get torch and IPEX version
PT_VERSION = nc_torch.get_torch_version().release
class DummyDataloader(data.DataLoader):
def __init__(self, device="cpu"):
self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
self.sequence_a = "intel-extension-for-transformers is based in SH"
self.sequence_b = "Where is intel-extension-for-transformers based? NYC or SH"
self.encoded_dict = self.tokenizer(self.sequence_a, self.sequence_b, return_tensors="pt")
self.batch_size = 1
self.device = device
def __len__(self):
return 10
def __getitem__(self, index):
"""Returns one data pair (source and target)."""
if index < 10:
return self.encoded_dict.to(self.device)
def __iter__(self):
for _ in range(10):
yield self.encoded_dict.to(self.device)
class M(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv = torch.nn.Conv2d(3, 1, 1)
self.linear = torch.nn.Linear(224 * 224, 5)
def forward(self, x):
x = self.conv(x)
x = x.view(1, -1)
x = self.linear(x)
return x
def calib_func(model):
# switch to evaluate mode
model.eval()
device = next(model.parameters()).device
with torch.no_grad():
input = torch.randn(1, 3, 224, 224).to(device)
# compute output
output = model(input)
class Dataloader:
def __init__(self, device="cpu") -> None:
self.batch_size = 1
self.device = device
def __iter__(self):
yield torch.randn(1, 3, 224, 224).to(self.device)
@unittest.skipIf(
PT_VERSION < Version("1.12.0").release, "Please use Intel extension for Pytorch version higher or equal to 1.12"
)
class TestPytorchIPEX_1_12_Adaptor(unittest.TestCase):
@classmethod
def setUpClass(self):
set_workspace("./saved")
@classmethod
def tearDownClass(self):
shutil.rmtree("./saved", ignore_errors=True)
shutil.rmtree("runs", ignore_errors=True)
def test_copy_prepared_model(self):
model = M()
if PT_VERSION < Version("2.1").release:
qconfig = ipex.quantization.default_static_qconfig
else:
qconfig = ipex.quantization.default_static_qconfig_mapping
prepared_model = ipex.quantization.prepare(
model, qconfig, example_inputs=torch.ones(1, 3, 224, 224), inplace=False
)
copy_model = torch_utils.util.auto_copy(prepared_model)
self.assertTrue(isinstance(copy_model, torch.nn.Module))
def test_new_API(self):
model = M()
from neural_compressor import PostTrainingQuantConfig, quantization
op_type_dict = {
"add": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}},
"linear": {
"weight": {
"dtype": ["int8"],
"scheme": ["sym"],
"granularity": ["per_channel"],
"algorithm": ["minmax"],
},
"activation": {
"dtype": ["int8"],
"scheme": ["sym"],
"granularity": ["per_tensor"],
"algorithm": ["kl"],
},
},
}
conf = PostTrainingQuantConfig(
backend="ipex",
op_type_dict=op_type_dict,
)
calib_dataloader = Dataloader()
q_model = quantization.fit(
model,
conf,
calib_dataloader=calib_dataloader,
)
q_model.save("./saved")
self.assertTrue(isinstance(q_model._model, torch.jit.ScriptModule))
def test_fallback_fused_op_type(self):
class M(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv = torch.nn.Conv2d(3, 1, 1)
self.linear = torch.nn.Linear(224 * 224, 5)
def forward(self, a):
x = self.conv(a)
x += x
x = x.view(1, -1)
x = self.linear(x)
return x
model = M()
from neural_compressor import PostTrainingQuantConfig, quantization
op_type_dict = {
"Conv2d&add": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}},
}
conf = PostTrainingQuantConfig(
backend="ipex",
op_type_dict=op_type_dict,
)
calib_dataloader = Dataloader()
q_model = quantization.fit(
model,
conf,
calib_dataloader=calib_dataloader,
)
self.assertTrue(isinstance(q_model._model, torch.jit.ScriptModule))
def test_tune_minmax_obs(self):
class M(torch.nn.Module):
def __init__(self):
super().__init__()
self.linear = torch.nn.Linear(2, 2, False)
def forward(self, x):
x = self.linear(x)
x = x + x
return x
example_input = torch.tensor([[torch.finfo(torch.float32).max, -torch.finfo(torch.float32).max]])
model = M()
model.linear.weight = torch.nn.Parameter(torch.tensor([[0.0, 1.0], [1.0, 0.0]]))
def calib_func(model):
model(example_input)
from neural_compressor import PostTrainingQuantConfig, quantization
conf = PostTrainingQuantConfig(
backend="ipex",
example_inputs=example_input,
op_name_dict={".*": {"activation": {"algorithm": "minmax"}}},
recipes={"smooth_quant": True, "smooth_quant_args": {"alpha": 0.5}},
)
q_model = quantization.fit(model, conf, calib_func=calib_func)
self.assertTrue(isinstance(q_model._model, torch.jit.ScriptModule))
@unittest.skipIf(
IPEX_VERSION.release < Version("2.1.0").release,
"Please use Intel extension for Pytorch version higher or equal to 2.1.0",
)
def test_dict_inputs_for_model(self):
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
dummy_dataloader = DummyDataloader()
from neural_compressor import PostTrainingQuantConfig, quantization
conf = PostTrainingQuantConfig(
backend="ipex",
)
q_model = quantization.fit(
model,
conf,
calib_dataloader=dummy_dataloader,
)
q_model.save("./saved")
self.assertTrue(isinstance(q_model._model, torch.jit.ScriptModule))
@unittest.skipIf(
IPEX_VERSION.release < Version("2.1.0").release,
"Please use Intel extension for Pytorch version higher or equal to 2.1.0",
)
def test_dict_inputs_for_model_calib_func(self):
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
example_inputs = DummyDataloader()[0]
from neural_compressor import PostTrainingQuantConfig, quantization
def calib_func(p_model):
p_model(**example_inputs)
conf = PostTrainingQuantConfig(backend="ipex", example_inputs=example_inputs)
q_model = quantization.fit(
model,
conf,
calib_func=calib_func,
)
q_model.save("./saved")
self.assertTrue(isinstance(q_model._model, torch.jit.ScriptModule))
@unittest.skipIf(
PT_VERSION < Version("1.12.0").release or not TEST_XPU,
"Please use Intel extension for Pytorch version higher or equal to 1.12",
)
class TestPytorchIPEX_XPU_1_12_Adaptor(unittest.TestCase):
@classmethod
def setUpClass(self):
set_workspace("./saved")
@classmethod
def tearDownClass(self):
shutil.rmtree("./saved", ignore_errors=True)
shutil.rmtree("runs", ignore_errors=True)
def test_new_API(self):
model = M().to("xpu")
from neural_compressor import PostTrainingQuantConfig, quantization
op_type_dict = {
"add": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}},
"linear": {
"weight": {
"dtype": ["int8"],
"scheme": ["sym"],
"granularity": ["per_channel"],
"algorithm": ["minmax"],
},
"activation": {
"dtype": ["int8"],
"scheme": ["sym"],
"granularity": ["per_tensor"],
"algorithm": ["kl"],
},
},
}
conf = PostTrainingQuantConfig(
backend="ipex",
device="xpu",
op_type_dict=op_type_dict,
)
calib_dataloader = Dataloader(device="xpu")
q_model = quantization.fit(
model,
conf,
calib_dataloader=calib_dataloader,
)
q_model.save("./saved")
self.assertTrue(isinstance(q_model._model, torch.jit.ScriptModule))
def test_fallback_fused_op_type(self):
class M(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv = torch.nn.Conv2d(3, 1, 1)
self.linear = torch.nn.Linear(224 * 224, 5)
def forward(self, a):
x = self.conv(a)
x += x
x = x.view(1, -1)
x = self.linear(x)
return x
model = M().to("xpu")
from neural_compressor import PostTrainingQuantConfig, quantization
op_type_dict = {
"Conv2d&add": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}},
}
conf = PostTrainingQuantConfig(
backend="ipex",
device="xpu",
op_type_dict=op_type_dict,
)
calib_dataloader = Dataloader(device="xpu")
q_model = quantization.fit(
model,
conf,
calib_dataloader=calib_dataloader,
)
self.assertTrue(isinstance(q_model._model, torch.jit.ScriptModule))
class TestMixedPrecision(unittest.TestCase):
@classmethod
def setUpClass(self):
os.environ["FORCE_FP16"] = "1"
os.environ["FORCE_BF16"] = "1"
self.pt_model = M()
@classmethod
def tearDownClass(self):
os.environ.pop("FORCE_FP16", None)
os.environ.pop("FORCE_BF16", None)
@unittest.skipIf(
IPEX_VERSION.release < Version("1.11.0").release,
"Please use PyTroch 1.11 or higher version for mixed precision.",
)
def test_mixed_precision_with_eval_func_ipex(self):
torch = LazyImport("torch")
def eval(model):
return 0.5
conf = MixedPrecisionConfig(backend="ipex", example_inputs=torch.randn(1, 3, 224, 224))
output_model = mix_precision.fit(
self.pt_model,
conf,
eval_func=eval,
)
self.assertTrue(isinstance(output_model._model, torch.jit.ScriptModule))
@unittest.skipIf(
PT_VERSION < Version("1.12.0").release or not TEST_XPU,
"Please use Intel extension for Pytorch version higher or equal to 1.12",
)
class TestMixedPrecisionXPU(unittest.TestCase):
@classmethod
def setUpClass(self):
os.environ["FORCE_FP16"] = "1"
os.environ["FORCE_BF16"] = "1"
self.pt_model = M().to("xpu")
@classmethod
def tearDownClass(self):
os.environ.pop("FORCE_FP16", None)
os.environ.pop("FORCE_BF16", None)
@unittest.skipIf(
IPEX_VERSION.release < Version("1.11.0").release,
"Please use PyTroch 1.11 or higher version for mixed precision.",
)
def test_mixed_precision_with_eval_func_ipex(self):
torch = LazyImport("torch")
def eval(model):
return 0.5
conf = MixedPrecisionConfig(
backend="ipex",
device="xpu",
example_inputs=torch.randn(1, 3, 224, 224).to("xpu"),
)
output_model = mix_precision.fit(
self.pt_model,
conf,
eval_func=eval,
)
self.assertTrue(isinstance(output_model._model, torch.jit.ScriptModule))
if __name__ == "__main__":
unittest.main()