{"id":28313,"date":"2025-04-19T23:41:35","date_gmt":"2025-04-19T15:41:35","guid":{"rendered":"https:\/\/www.wsisp.com\/helps\/28313.html"},"modified":"2025-04-19T23:41:35","modified_gmt":"2025-04-19T15:41:35","slug":"%e6%9c%8d%e5%8a%a1%e5%99%a8%e6%9c%ac%e5%9c%b0%e5%be%ae%e8%b0%83ministral-8b%e6%a8%a1%e5%9e%8b%e8%bf%9b%e8%a1%8cnli%e4%bb%bb%e5%8a%a1","status":"publish","type":"post","link":"https:\/\/www.wsisp.com\/helps\/28313.html","title":{"rendered":"\u670d\u52a1\u5668\u672c\u5730\u5fae\u8c03Ministral-8B\u6a21\u578b\u8fdb\u884cNLI\u4efb\u52a1"},"content":{"rendered":"<h2>\u4e3b\u8981\u6d41\u7a0b<\/h2>\n<h3>1. \u524d\u671f\u51c6\u5907<\/h3>\n<p>\u786c\u4ef6\u65b9\u9762\u9700\u8981\u4e00\u4e2a\u8f83\u9ad8\u914d\u7f6e\u7684GPU&#xff0c;\u6211\u8fd9\u91cc\u4f7f\u7528\u7684\u662fA100&#xff0c;80G\u663e\u5b58<\/p>\n<p>&#xff08;\u7531\u4e8e\u670d\u52a1\u5668\u8fde\u4e0d\u4e0ahuggingface&#xff0c;\u6211\u5c06\u6a21\u578b\u4e0b\u8f7d\u597d\u5e76\u4e0a\u4f20\u5230\u670d\u52a1\u5668\u8fdb\u884c\u672c\u5730\u8bad\u7ec3&#xff09;<\/p>\n<h4>\u6a21\u578b\u4e0b\u8f7d<\/h4>\n<p>\u6a21\u578b\u5730\u5740&#xff1a;https:\/\/huggingface.co\/mistralai\/Ministral-8B-Instruct-2410<\/p>\n<p>\u4e0b\u8f7d\u6a21\u578b\u4e4b\u524d\u9700\u8981\u5148\u7533\u8bf7\u4e00\u4e0b\u6743\u9650&#xff0c;\u901a\u8fc7\u4e4b\u540e&#xff0c;\u5728Files and versions\u4e2d\u4e0b\u8f7d\u6a21\u578b\u76f8\u5173\u6587\u4ef6&#xff0c;\u6211\u76f4\u63a5\u4e0b\u8f7d\u4e86\u6240\u6709\u7684\u6587\u4ef6\u3002<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"1200\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250419154131-6803c42b42edb.png\" width=\"1200\" \/><\/p>\n<p>\u4e0b\u8f7d\u597d\u6a21\u578b\u4e4b\u540e&#xff0c;\u53ef\u4ee5\u901a\u8fc7scp\u6307\u4ee4\u4e0a\u4f20\u5230\u670d\u52a1\u5668\u4e2d&#xff0c;\u6bd4\u5982\u6211\u4e0a\u4f20\u5230\u670d\u52a1\u5668\u4e0a\u7684\u4f4d\u7f6e\u4e3a&#xff1a;\/home\/user_m\/models\/Ministral-8B-Instruct-2410<\/p>\n<h4>\u6570\u636e\u96c6\u4e0b\u8f7d<\/h4>\n<p>\u4e0e\u6a21\u578b\u4e00\u6837&#xff0c;\u7531\u4e8e\u65e0\u6cd5\u8fde\u63a5\u5230huggingface&#xff0c;\u6211\u9009\u62e9\u5c06\u6570\u636e\u96c6\u4e0b\u8f7d\u5e76\u4e0a\u4f20\u5230\u670d\u52a1\u5668\u4e2d&#xff0c;\u6211\u8fd9\u91cc\u4f7f\u7528\u7684\u6570\u636e\u96c6\u4e3a\u00a0<span style=\"color:#000000\">e-SNLI: <\/span><span style=\"color:#215f9a\">https:\/\/<\/span><span style=\"color:#215f9a\">huggingface.co<\/span><span style=\"color:#215f9a\">\/datasets\/<\/span><span style=\"color:#215f9a\">esnli<\/span><span style=\"color:#215f9a\">\/<\/span><span style=\"color:#215f9a\">esnli<\/span><\/p>\n<p>\u540c\u6837&#xff0c;\u5c06Files and versions\u4e2d\u7684\u6587\u4ef6\u4e0b\u8f7d\u5e76\u901a\u8fc7scp\u6307\u4ee4\u4e0a\u4f20\u5230\u670d\u52a1\u5668&#xff0c;\u6211\u7684\u6570\u636e\u96c6\u5728\u670d\u52a1\u5668\u4e2d\u7684\u4f4d\u7f6e\u4e3a&#xff1a;\/home\/user_m\/DataSet\/esnil<\/p>\n<p>\u4e0b\u9762\u662f\u6570\u636e\u96c6\u4e2d\u4e00\u6761\u6570\u636e\u7684\u793a\u4f8b&#xff1a;<\/p>\n<p>{<br \/>\n    &#034;explanation_1&#034;: &#034;A woman must be present to smile.&#034;,<br \/>\n    &#034;explanation_2&#034;: &#034;A woman smiling implies that she is present.&#034;,<br \/>\n    &#034;explanation_3&#034;: &#034;A smiling woman is also present.&#034;,<br \/>\n    &#034;hypothesis&#034;: &#034;A woman is present.&#034;,<br \/>\n    &#034;label&#034;: 0,<br \/>\n    &#034;premise&#034;: &#034;A woman smiles at the child.&#034;<br \/>\n} <\/p>\n<p>\u6570\u636e\u96c6\u5185\u5bb9&#xff1a;<\/p>\n<li>\u524d\u63d0&#xff08;premise&#xff09;&#xff1a;\u4e00\u4e2a\u7ed9\u5b9a\u7684\u53e5\u5b50&#xff0c;\u4f5c\u4e3a\u63a8\u7406\u7684\u57fa\u7840\u3002<\/li>\n<li>\u5047\u8bbe&#xff08;hypothesis&#xff09;&#xff1a;\u9700\u8981\u6839\u636e\u524d\u63d0\u8fdb\u884c\u63a8\u65ad\u7684\u53e5\u5b50\u3002<\/li>\n<li>\u5173\u7cfb\u6807\u7b7e&#xff08;label&#xff09;&#xff1a;<img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"312\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250419154132-6803c42c8cff6.png\" width=\"961\" \/><\/li>\n<li>\u89e3\u91ca&#xff08;explanation&#xff09;&#xff1a;\n<ul>\n<li>\u5bf9\u4e0a\u8ff0\u63a8\u7406\u8fc7\u7a0b\u7684\u81ea\u7136\u8bed\u8a00\u89e3\u91ca&#xff0c;\u5e2e\u52a9\u6a21\u578b\u7406\u89e3\u63a8\u7406\u7684\u539f\u56e0\u3002<\/li>\n<li>\u89e3\u91ca\u7684\u5f62\u5f0f\u4e3a\u4eba\u5de5\u6807\u6ce8&#xff0c;\u4fdd\u8bc1\u8d28\u91cf\u548c\u6e05\u6670\u5ea6<\/li>\n<\/ul>\n<\/li>\n<h3>2. \u4f7f\u7528peft\u8fdb\u884c\u5fae\u8c03<\/h3>\n<p>peft\u5730\u5740&#xff1a;https:\/\/huggingface.co\/docs\/peft\/index<\/p>\n<p>\u5fae\u8c03\u4ee3\u7801\u5982\u4e0b&#xff1a; \u00a0<\/p>\n<p>from copy import deepcopy<br \/>\nfrom argparse import ArgumentParser<br \/>\nfrom datasets import load_from_disk,load_dataset<br \/>\nimport evaluate<br \/>\nimport numpy as np<br \/>\nfrom peft import get_peft_model, LoraConfig, TaskType<br \/>\nfrom transformers import AutoTokenizer, DataCollatorWithPadding<br \/>\nfrom transformers import AutoModelForSequenceClassification<br \/>\nfrom transformers import TrainingArguments, Trainer, TrainerCallback<br \/>\nimport torch<\/p>\n<p>POS_WEIGHT, NEG_WEIGHT, NATURAL_WEIGHT &#061; (1.1637114032405993, 0.8766697374481806, 1.0)<\/p>\n<p>def get_args():<br \/>\n    parser &#061; ArgumentParser(description&#061;&#034;Fine-tune an LLM model with PEFT&#034;)<br \/>\n    parser.add_argument(<br \/>\n        &#034;&#8211;data_path&#034;,<br \/>\n        type&#061;str,<br \/>\n        default&#061;&#039;\/home\/user_m\/DataSet\/esnil\/esnli.py&#039;,<br \/>\n        required&#061;True,<br \/>\n        help&#061;&#034;Path to Huggingface pre-processed dataset&#034;,<br \/>\n    )<br \/>\n    parser.add_argument(<br \/>\n        &#034;&#8211;output_path&#034;,<br \/>\n        type&#061;str,<br \/>\n        default&#061;&#039;\/home\/user_m\/output&#039;,<br \/>\n        required&#061;True,<br \/>\n        help&#061;&#034;Path to store the fine-tuned model&#034;,<br \/>\n    )<br \/>\n    parser.add_argument(<br \/>\n        &#034;&#8211;model_name&#034;,<br \/>\n        type&#061;str,<br \/>\n        default&#061;&#039;\/home\/user_m\/models\/Ministral-8B-Instruct-2410&#039;,<br \/>\n        required&#061;True,<br \/>\n        help&#061;&#034;Name of the pre-trained LLM to fine-tune&#034;,<br \/>\n    )<br \/>\n    parser.add_argument(<br \/>\n        &#034;&#8211;max_length&#034;,<br \/>\n        type&#061;int,<br \/>\n        default&#061;128,<br \/>\n        required&#061;False,<br \/>\n        help&#061;&#034;Maximum length of the input sequences&#034;,<br \/>\n    )<br \/>\n    parser.add_argument(<br \/>\n        &#034;&#8211;set_pad_id&#034;,<br \/>\n        action&#061;&#034;store_true&#034;,<br \/>\n        help&#061;&#034;Set the id for the padding token, needed by models such as Mistral-7B&#034;,<br \/>\n    )<br \/>\n    parser.add_argument(<br \/>\n        &#034;&#8211;lr&#034;, type&#061;float, default&#061;2e-4, help&#061;&#034;Learning rate for training&#034;<br \/>\n    )<br \/>\n    parser.add_argument(<br \/>\n        &#034;&#8211;train_batch_size&#034;, type&#061;int, default&#061;64, help&#061;&#034;Train batch size&#034;<br \/>\n    )<br \/>\n    parser.add_argument(<br \/>\n        &#034;&#8211;eval_batch_size&#034;, type&#061;int, default&#061;64, help&#061;&#034;Eval batch size&#034;<br \/>\n    )<br \/>\n    parser.add_argument(<br \/>\n        &#034;&#8211;num_epochs&#034;, type&#061;int, default&#061;2, help&#061;&#034;Number of epochs&#034;<br \/>\n    )<br \/>\n    parser.add_argument(<br \/>\n        &#034;&#8211;weight_decay&#034;, type&#061;float, default&#061;0.1, help&#061;&#034;Weight decay&#034;<br \/>\n    )<br \/>\n    parser.add_argument(<br \/>\n        &#034;&#8211;lora_rank&#034;, type&#061;int, default&#061;4, help&#061;&#034;Lora rank&#034;<br \/>\n    )<br \/>\n    parser.add_argument(<br \/>\n        &#034;&#8211;lora_alpha&#034;, type&#061;float, default&#061;0.0, help&#061;&#034;Lora alpha&#034;<br \/>\n    )<br \/>\n    parser.add_argument(<br \/>\n        &#034;&#8211;lora_dropout&#034;, type&#061;float, default&#061;0.2, help&#061;&#034;Lora dropout&#034;<br \/>\n    )<br \/>\n    parser.add_argument(<br \/>\n        &#034;&#8211;lora_bias&#034;,<br \/>\n        type&#061;str,<br \/>\n        default&#061;&#039;none&#039;,<br \/>\n        choices&#061;{&#034;lora_only&#034;, &#034;none&#034;, &#039;all&#039;},<br \/>\n        help&#061;&#034;Layers to add learnable bias&#034;<br \/>\n    )<\/p>\n<p>    arguments &#061; parser.parse_args()<br \/>\n    return arguments<\/p>\n<p>def compute_metrics(eval_pred):<br \/>\n    precision_metric &#061; evaluate.load(&#034;\/home\/user_m\/evaluate\/metrics\/precision&#034;)<br \/>\n    recall_metric &#061; evaluate.load(&#034;\/home\/user_m\/evaluate\/metrics\/recall&#034;)<br \/>\n    f1_metric &#061; evaluate.load(&#034;\/home\/user_m\/evaluate\/metrics\/f1&#034;)<br \/>\n    accuracy_metric &#061; evaluate.load(&#034;\/home\/user_m\/evaluate\/metrics\/accuracy&#034;)<\/p>\n<p>    logits, labels &#061; eval_pred<br \/>\n    predictions &#061; np.argmax(logits, axis&#061;-1)<br \/>\n    precision &#061; precision_metric.compute(predictions&#061;predictions, references&#061;labels,average&#061;&#039;macro&#039;)[&#034;precision&#034;]<br \/>\n    recall &#061; recall_metric.compute(predictions&#061;predictions, references&#061;labels,average&#061;&#039;macro&#039;)[&#034;recall&#034;]<br \/>\n    f1 &#061; f1_metric.compute(predictions&#061;predictions, references&#061;labels,average&#061;&#039;macro&#039;)[&#034;f1&#034;]<br \/>\n    accuracy &#061; accuracy_metric.compute(predictions&#061;predictions, references&#061;labels)[&#034;accuracy&#034;]<br \/>\n    return {&#034;precision&#034;: precision, &#034;recall&#034;: recall, &#034;f1-score&#034;: f1, &#039;accuracy&#039;: accuracy}<\/p>\n<p>class CustomCallback(TrainerCallback):<br \/>\n    def __init__(self, trainer) -&gt; None:<br \/>\n        super().__init__()<br \/>\n        self._trainer &#061; trainer<\/p>\n<p>    def on_epoch_end(self, args, state, control, **kwargs):<br \/>\n        if control.should_evaluate:<br \/>\n            control_copy &#061; deepcopy(control)<br \/>\n            self._trainer.evaluate(eval_dataset&#061;self._trainer.train_dataset, metric_key_prefix&#061;&#034;train&#034;)<br \/>\n            return control_copy<\/p>\n<p>def get_dataset_and_collator(<br \/>\n    data_path,<br \/>\n    model_checkpoints,<br \/>\n    # add_prefix_space&#061;True,<br \/>\n    max_length&#061;128,<br \/>\n    truncation&#061;True,<br \/>\n    set_pad_id&#061;False<br \/>\n):<br \/>\n    &#034;&#034;&#034;<br \/>\n    Load the preprocessed HF dataset with train, valid and test objects<\/p>\n<p>    Paramters:<br \/>\n    &#8212;&#8212;&#8212;<br \/>\n    data_path: str<br \/>\n        Path to the pre-processed HuggingFace dataset<br \/>\n    model_checkpoints:<br \/>\n        Name of the pre-trained model to use for tokenization<br \/>\n    &#034;&#034;&#034;<br \/>\n    data &#061; load_dataset(data_path)<\/p>\n<p>    tokenizer &#061; AutoTokenizer.from_pretrained(<br \/>\n        model_checkpoints,<br \/>\n        # add_prefix_space&#061;add_prefix_space<br \/>\n    )<br \/>\n    print(&#034;Tokenizer loaded successfully&#034;)<br \/>\n    tokenizer.pad_token &#061; tokenizer.eos_token<\/p>\n<p>    def _preprocesscing_function(examples):<br \/>\n        return tokenizer(examples[&#034;premise&#034;],examples[&#034;hypothesis&#034;], truncation&#061;True, padding&#061;&#034;max_length&#034;, max_length&#061;max_length)<\/p>\n<p>    col_to_delete &#061; [&#039;explanation_1&#039;, &#039;explanation_2&#039;,&#039;explanation_3&#039;, &#039;premise&#039;,&#039;hypothesis&#039;]<br \/>\n    tokenized_datasets &#061; data.map(_preprocesscing_function, batched&#061;False)<br \/>\n    tokenized_datasets &#061; tokenized_datasets.remove_columns(col_to_delete)<br \/>\n    tokenized_datasets.set_format(&#034;torch&#034;)<\/p>\n<p>    padding_collator &#061; DataCollatorWithPadding(tokenizer&#061;tokenizer)<\/p>\n<p>    return tokenized_datasets, padding_collator<\/p>\n<p>def get_lora_model(model_checkpoints, num_labels&#061;3, rank&#061;4, alpha&#061;16, lora_dropout&#061;0.1, bias&#061;&#039;none&#039;):<br \/>\n    &#034;&#034;&#034;<br \/>\n    TODO<br \/>\n    &#034;&#034;&#034;<br \/>\n    model &#061;  AutoModelForSequenceClassification.from_pretrained(<br \/>\n            pretrained_model_name_or_path&#061;model_checkpoints,<br \/>\n            num_labels&#061;num_labels,<br \/>\n            device_map&#061;&#034;auto&#034;,<br \/>\n            offload_folder&#061;&#034;offload&#034;,<br \/>\n            trust_remote_code&#061;True,<br \/>\n        )<br \/>\n    model.config.pad_token_id &#061; model.config.eos_token_id<\/p>\n<p>    peft_config &#061; LoraConfig(<br \/>\n        task_type&#061;TaskType.SEQ_CLS, r&#061;rank, lora_alpha&#061;alpha, lora_dropout&#061;lora_dropout, bias&#061;bias,<br \/>\n        target_modules&#061;[<br \/>\n            &#034;q_proj&#034;,<br \/>\n            &#034;v_proj&#034;,<br \/>\n            # &#034;score&#034;<br \/>\n        ],<br \/>\n    )<\/p>\n<p>    model &#061; get_peft_model(model, peft_config)<\/p>\n<p>    print(model.print_trainable_parameters())<\/p>\n<p>    return model<\/p>\n<p>def get_weighted_trainer(pos_weight, neg_weight,neutral_weight&#061;1.0):<br \/>\n    class _WeightedBCELossTrainer(Trainer):<br \/>\n        def compute_loss(self, model, inputs, return_outputs&#061;False,num_items_in_batch&#061;None):<br \/>\n            labels &#061; inputs.pop(&#034;labels&#034;)<br \/>\n            # forward pass<br \/>\n            outputs &#061; model(**inputs)<br \/>\n            logits &#061; outputs.get(&#034;logits&#034;)<br \/>\n            # compute custom loss (suppose one has 3 labels with different weights)<br \/>\n            loss_fct &#061; torch.nn.CrossEntropyLoss(weight&#061;torch.tensor([neg_weight, pos_weight, neutral_weight], device&#061;labels.device, dtype&#061;logits.dtype))<br \/>\n            loss &#061; loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))<br \/>\n            return (loss, outputs) if return_outputs else loss<br \/>\n    return _WeightedBCELossTrainer<\/p>\n<p>def main(args):<br \/>\n    &#034;&#034;&#034;<br \/>\n    Training function<br \/>\n    &#034;&#034;&#034;<\/p>\n<p>    dataset, collator &#061;  get_dataset_and_collator(<br \/>\n        args.data_path,<br \/>\n        args.model_name,<br \/>\n        max_length&#061;args.max_length,<br \/>\n        set_pad_id&#061;args.set_pad_id,<br \/>\n        # add_prefix_space&#061;True,<br \/>\n        truncation&#061;True,<br \/>\n    )<\/p>\n<p>    training_args &#061; TrainingArguments(<br \/>\n        output_dir&#061;args.output_path,<br \/>\n        learning_rate&#061;args.lr,<br \/>\n        lr_scheduler_type&#061; &#034;cosine&#034;,<br \/>\n        warmup_ratio&#061; 0.1,<br \/>\n        per_device_train_batch_size&#061;args.train_batch_size,<br \/>\n        per_device_eval_batch_size&#061;args.eval_batch_size,<br \/>\n        num_train_epochs&#061;args.num_epochs,<br \/>\n        weight_decay&#061;args.weight_decay,<br \/>\n        evaluation_strategy&#061;&#034;epoch&#034;,<br \/>\n        save_strategy&#061;&#034;epoch&#034;,<br \/>\n        load_best_model_at_end&#061;True,<br \/>\n        gradient_checkpointing&#061;True,<br \/>\n        fp16&#061;True,<br \/>\n        max_grad_norm&#061; 0.3,<br \/>\n    )<\/p>\n<p>    model &#061; get_lora_model(<br \/>\n        args.model_name,<br \/>\n        rank&#061;args.lora_rank,<br \/>\n        alpha&#061;args.lora_alpha,<br \/>\n        lora_dropout&#061;args.lora_dropout,<br \/>\n        bias&#061;args.lora_bias<br \/>\n    )<br \/>\n    if args.set_pad_id:<br \/>\n        model.config.pad_token_id &#061; model.config.eos_token_id<\/p>\n<p>    # move model to GPU device<br \/>\n    if model.device.type !&#061; &#039;cuda&#039;:<br \/>\n        model&#061;model.to(&#039;cuda&#039;)<\/p>\n<p>    weighted_trainer &#061; get_weighted_trainer(POS_WEIGHT, NEG_WEIGHT,NATURAL_WEIGHT)<\/p>\n<p>    trainer &#061; weighted_trainer(<br \/>\n        model&#061;model,<br \/>\n        args&#061;training_args,<br \/>\n        train_dataset&#061;dataset[&#039;train&#039;],<br \/>\n        eval_dataset&#061;dataset[&#034;validation&#034;],<br \/>\n        data_collator&#061;collator,<br \/>\n        compute_metrics&#061;compute_metrics<br \/>\n    )<br \/>\n    trainer.add_callback(CustomCallback(trainer))<br \/>\n    trainer.train()<\/p>\n<p>if __name__ &#061;&#061; &#034;__main__&#034;:<br \/>\n    args &#061; get_args()<br \/>\n    main(args) <\/p>\n<h3>3.\u6574\u4e2a\u8fc7\u7a0b\u4e2d\u9047\u5230\u7684\u95ee\u9898<\/h3>\n<h4>3.1\u00a0 raise ValueError(&#034;Cannot handle batch sizes &gt; 1 if no padding token is defined.&#034;) ValueError: Cannot handle batch sizes &gt; 1 if no padding token is defined.<\/h4>\n<p><img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"70\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250419154132-6803c42cbfda4.png\" width=\"1041\" \/><\/p>\n<p>\u539f\u56e0&#xff1a;<\/p>\n<p>\u5728\u52a0\u8f7dtokenizer\u65f6&#xff0c;\u4f7f\u7528\u4e86padding<\/p>\n<p>tokenizer.pad_token &#061; tokenizer.eos_token<br \/>\ntokenizer(examples[&#034;premise&#034;],examples[&#034;hypothesis&#034;], truncation&#061;True, padding&#061;&#034;max_length&#034;, max_length&#061;max_length) <\/p>\n<p>\u867d\u7136\u6307\u5b9a\u4e86tokenizer\u7684pad_token,\u4f46\u662f\u8fd9\u4e2a\u6a21\u578b\u672c\u8eab\u5e76\u6ca1\u6709\u9ed8\u8ba4\u7684pad_token_id&#xff0c;\u5bfc\u81f4\u6a21\u578b\u8ba4\u4e0d\u51fa\u8fd9\u4e2apad_token\u6240\u4ee5\u51fa\u73b0\u4e86\u62a5\u9519.<\/p>\n<\/p>\n<p>\u89e3\u51b3\u529e\u6cd5&#xff1a;<\/p>\n<p>\u5728\u52a0\u8f7d\u5b8c\u6a21\u578b\u4e4b\u540e&#xff0c;\u6307\u5b9apad_token_id:<\/p>\n<p>model &#061;  AutoModelForSequenceClassification.from_pretrained(<br \/>\n            pretrained_model_name_or_path&#061;model_checkpoints,<br \/>\n            num_labels&#061;num_labels,<br \/>\n            device_map&#061;&#034;auto&#034;,<br \/>\n            offload_folder&#061;&#034;offload&#034;,<br \/>\n            trust_remote_code&#061;True,<br \/>\n        )<br \/>\nmodel.config.pad_token_id &#061; model.config.eos_token_id <\/p>\n<p>\u8fd9\u6837\u6a21\u578b\u5c31\u77e5\u9053\u4f7f\u7528\u4e86eos_token\u4f5c\u4e3apad_token<\/p>\n<\/p>\n<h4>3.2 \u8bad\u7ec3\u540e\u8bc4\u4f30accuracy\u5f88\u4f4e<\/h4>\n<p>\u8bc4\u4f30\u5fae\u8c03\u4e4b\u540e\u6a21\u578b\u63a8\u7406\u7684accuracy\u65f6&#xff0c;\u53d1\u73b0\u6b63\u786e\u7387\u53ea\u67090.3\u591a\u4e00\u70b9&#xff0c;\u5bf9\u4e8e\u4e09\u5206\u7c7b\u95ee\u9898\u6765\u8bf4&#xff0c;\u8fd9\u4e2a\u51c6\u786e\u7387\u548c\u778e\u731c\u6ca1\u6709\u4ec0\u4e48\u533a\u522b\u3002\u3002\u3002\u6211\u5206\u6790\u4e86\u6211\u6700\u5f00\u59cb\u8fdb\u884c\u8bad\u7ec3\u7684\u4ee3\u7801&#xff0c;\u53d1\u73b0\u4e86\u5982\u4e0b\u95ee\u9898&#xff1a;<\/p>\n<p>\u6700\u521d\u6211\u8fdb\u884c\u8bad\u7ec3\u7684\u4ee3\u7801\u5982\u4e0b&#xff1a; \u00a0<\/p>\n<p>import torch<br \/>\nfrom transformers import AutoTokenizer, AutoModelForSequenceClassification<br \/>\nfrom datasets import load_dataset<\/p>\n<p>from peft import get_peft_model, LoraConfig<br \/>\nfrom transformers import Trainer, TrainingArguments<\/p>\n<p>import os<\/p>\n<p>model_path &#061; &#034;\/home\/user_m\/models\/Ministral-8B-Instruct-2410&#034;<br \/>\ntokenizer &#061; AutoTokenizer.from_pretrained(model_path)<\/p>\n<p>print(tokenizer.pad_token_id)<br \/>\nif tokenizer.pad_token_id is None:<br \/>\n    tokenizer.pad_token_id &#061; tokenizer.eos_token_id<br \/>\n# \u52a0\u8f7d ESNLI \u6570\u636e\u96c6<br \/>\ndataset &#061; load_dataset(&#034;\/home\/user_m\/DataSet\/esnil\/esnli.py&#034;,trust_remote_code&#061;True)<\/p>\n<p>def preprocess_function(examples):<br \/>\n    # print(examples[&#034;label&#034;])  # \u6253\u5370\u6807\u7b7e\u67e5\u770b<br \/>\n    return tokenizer(examples[&#034;premise&#034;], examples[&#034;hypothesis&#034;], padding&#061;&#039;max_length&#039; ,max_length &#061; 128, truncation&#061;True)<br \/>\n    # return tokenizer(examples[&#034;premise&#034;], examples[&#034;hypothesis&#034;])<br \/>\nencoded_dataset &#061; dataset.map(preprocess_function, batched&#061;True)<br \/>\nprint(encoded_dataset)<\/p>\n<p># \u4f7f\u7528Peft\u5fae\u8c03\u6a21\u578b<br \/>\n# \u8bbe\u7f6e LoRA \u914d\u7f6e<br \/>\nmodel &#061; AutoModelForSequenceClassification.from_pretrained(model_path, num_labels&#061;3)  # ESNLI \u662f\u4e09\u5206\u7c7b\u95ee\u9898<br \/>\nmodel.config.pad_token_id &#061; model.config.eos_token_id<br \/>\nlora_config &#061; LoraConfig(<br \/>\n    r&#061;8,  # LoRA\u7684\u4f4e\u79e9\u5927\u5c0f<br \/>\n    lora_alpha&#061;16,  # \u6807\u91cf<br \/>\n    lora_dropout&#061;0.1,<br \/>\n    target_modules&#061;[&#034;q_proj&#034;, &#034;k_proj&#034;, &#034;v_proj&#034;],  # LoRA\u9002\u7528\u4e8e\u7684\u6a21\u5757<br \/>\n)<\/p>\n<p># \u83b7\u53d6 PEFT \u6a21\u578b<br \/>\npeft_model &#061; get_peft_model(model, lora_config)<br \/>\npeft_model.print_trainable_parameters()<\/p>\n<p># \u8bbe\u7f6e\u8bad\u7ec3\u53c2\u6570<br \/>\ntraining_args &#061; TrainingArguments(<br \/>\n    output_dir&#061;&#034;\/home\/user_m\/output\/round2&#034;,<br \/>\n    evaluation_strategy&#061;&#034;epoch&#034;,<br \/>\n    save_strategy&#061;&#034;epoch&#034;,<br \/>\n    learning_rate&#061;1e-6,<br \/>\n    per_device_train_batch_size&#061;32,<br \/>\n    per_device_eval_batch_size&#061;32,<br \/>\n    fp16&#061;True,<br \/>\n    num_train_epochs&#061;1,<br \/>\n    weight_decay&#061;0.01,<br \/>\n    logging_dir&#061;&#039;\/home\/user_m\/logs&#039;,<br \/>\n    logging_steps&#061;10,<br \/>\n)<\/p>\n<p># \u4f7f\u7528 Trainer \u8fdb\u884c\u5fae\u8c03<br \/>\ntrainer &#061; Trainer(<br \/>\n    model&#061;peft_model,<br \/>\n    args&#061;training_args,<br \/>\n    train_dataset&#061;encoded_dataset[&#034;train&#034;],<br \/>\n    eval_dataset&#061;encoded_dataset[&#034;validation&#034;],<br \/>\n    tokenizer &#061; tokenizer,<br \/>\n)<\/p>\n<p># \u5f00\u59cb\u8bad\u7ec3<br \/>\ntrainer.train()<\/p>\n<p># \u4f7f\u7528 Trainer \u8fdb\u884c\u8bc4\u4f30<br \/>\nresults &#061; trainer.evaluate()<\/p>\n<p>\u7136\u540e\u6bcf\u6b21\u52a0\u8f7d\u5b8c\u6a21\u578b\u4e4b\u540e\u4f1a\u51fa\u73b0\u4ee5\u4e0b\u4fe1\u606f&#xff1a;<\/p>\n<p>Some weights of the model checkpoint at \/home\/user_m\/models\/Ministral-8B-Instruct-2410 were not used when initializing MistralForSequenceClassification: [&#039;lm_head.weight&#039;]<\/p>\n<p>Some weights of MistralForSequenceClassification were not initialized from the model checkpoint at \/home\/user_m\/models\/Ministral-8B-Instruct-2410 and are newly initialized: [&#039;score.weight&#039;] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.<img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"144\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250419154132-6803c42ce92c9.png\" width=\"1200\" \/><\/p>\n<\/p>\n<p>\u95ee\u9898\u89e3\u51b3&#xff1a;<\/p>\n<p>\u9996\u5148\u5728\u4ee3\u7801\u5f00\u5934\u52a0\u5165\u5982\u4e0b\u5185\u5bb9&#xff1a;<\/p>\n<p>from transformers import logging<\/p>\n<p>logging.set_verbosity_info() <\/p>\n<p>\u8fd9\u884c\u6307\u4ee4\u53ef\u4ee5\u4f7f\u5f97\u7ec8\u7aef\u8f93\u51fa\u7684\u4fe1\u606f\u66f4\u4e3a\u8be6\u7ec6&#xff0c;\u518d\u6b21\u8fd0\u884c\u4ee3\u7801&#xff0c;\u52a0\u8f7d\u5b8c\u6a21\u578b\u4e4b\u540e\u539f\u672c\u7684\u4fe1\u606f\u53d8\u4e3a&#xff1a;<img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"244\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250419154133-6803c42d2d9a5.png\" width=\"1200\" \/><\/p>\n<p>\u5927\u81f4\u610f\u601d\u662f\u8bf4&#xff0c;\u52a0\u8f7d\u6a21\u578b\u65f6&#xff0c;\u6ca1\u6709\u4f7f\u7528lm_head_weight\u8fd9\u4e2a\u6743\u91cd&#xff0c;\u8fd9\u662f\u56e0\u4e3a\u6211\u4eec\u4f7f\u7528\u6a21\u578b\u8fdb\u884c\u4e86\u5176\u4ed6\u7684\u4efb\u52a1\u3002\u5e76\u4e14\u52a0\u8f7d\u6a21\u578b\u65f6score.weight\u8fd9\u4e2a\u6743\u91cd\u6ca1\u6709\u4ece\u6a21\u578b\u7684checkpoint\u4e2d\u627e\u5230&#xff0c;\u6240\u4ee5\u6a21\u578b\u5bf9\u8fd9\u4e00\u90e8\u5206\u7684\u6743\u91cd\u91cd\u65b0\u8fdb\u884c\u4e86\u968f\u673a\u521d\u59cb\u5316\u3002<\/p>\n<p>\u6211\u4eec\u52a0\u8f7d\u5b8c\u6a21\u578b\u4e4b\u540e\u67e5\u770b\u6a21\u578b\u7684config&#xff1a;<\/p>\n<p>print(&#034;model.config: &#034;,model.config) <\/p>\n<p>\u00a0\u53ef\u4ee5\u5f97\u5230\u5982\u4e0b\u8f93\u51fa\u4fe1\u606f&#xff1a;<\/p>\n<p>Model config MistralConfig {<br \/>\n  &#034;_name_or_path&#034;: &#034;\/home\/user_m\/models\/Ministral-8B-Instruct-2410&#034;,<br \/>\n  &#034;architectures&#034;: [<br \/>\n    &#034;MistralForCausalLM&#034;<br \/>\n  ],<br \/>\n  &#034;attention_dropout&#034;: 0.0,<br \/>\n  &#034;bos_token_id&#034;: 1,<br \/>\n  &#034;eos_token_id&#034;: 2,<br \/>\n  &#034;head_dim&#034;: 128,<br \/>\n  &#034;hidden_act&#034;: &#034;silu&#034;,<br \/>\n  &#034;hidden_size&#034;: 4096,<br \/>\n  &#034;id2label&#034;: {<br \/>\n    &#034;0&#034;: &#034;LABEL_0&#034;,<br \/>\n    &#034;1&#034;: &#034;LABEL_1&#034;,<br \/>\n    &#034;2&#034;: &#034;LABEL_2&#034;<br \/>\n  },<br \/>\n  &#034;initializer_range&#034;: 0.02,<br \/>\n  &#034;intermediate_size&#034;: 12288,<br \/>\n  &#034;label2id&#034;: {<br \/>\n    &#034;LABEL_0&#034;: 0,<br \/>\n    &#034;LABEL_1&#034;: 1,<br \/>\n    &#034;LABEL_2&#034;: 2<br \/>\n  },<br \/>\n  &#034;max_position_embeddings&#034;: 32768,<br \/>\n  &#034;model_type&#034;: &#034;mistral&#034;,<br \/>\n  &#034;num_attention_heads&#034;: 32,<br \/>\n  &#034;num_hidden_layers&#034;: 36,<br \/>\n  &#034;num_key_value_heads&#034;: 8,<br \/>\n  &#034;rms_norm_eps&#034;: 1e-05,<br \/>\n  &#034;rope_theta&#034;: 100000000.0,<br \/>\n  &#034;sliding_window&#034;: 32768,<br \/>\n  &#034;tie_word_embeddings&#034;: false,<br \/>\n  &#034;torch_dtype&#034;: &#034;bfloat16&#034;,<br \/>\n  &#034;transformers_version&#034;: &#034;4.46.3&#034;,<br \/>\n  &#034;use_cache&#034;: true,<br \/>\n  &#034;vocab_size&#034;: 131072<br \/>\n} <\/p>\n<p>\u91cd\u70b9\u5173\u6ce8&#034;architectures&#034;: [&#034;MistralForCausalLM&#034;]&#xff0c;\u8bf4\u660e\u6a21\u578b\u672c\u8eab\u662f\u7528\u4e8e\u8fdb\u884cLM\u4efb\u52a1\u7684\u3002<\/p>\n<p>\u6211\u4eec\u518d\u4f7f\u7528AutoModelForCausalLM\u52a0\u8f7d\u5e76\u67e5\u770b\u6a21\u578b&#xff1a;<\/p>\n<p>model &#061; AutoModelForCausalLM.from_pretrained(model_name)<br \/>\nprint(model) <\/p>\n<p>\u8fd9\u6b21\u6211\u4eec\u4e0d\u518d\u5f97\u5230Some weights of the model checkpoint at \/home\/user_m\/models\/Ministral-8B-Instruct-2410 were not used when initializing MistralForSequenceClassification: [&#039;lm_head.weight&#039;]\u8fd9\u6837\u7684\u4fe1\u606f\u3002<\/p>\n<p>\u5e76\u4e14\u4f1a\u5f97\u5230\u4ee5\u4e0b\u8f93\u51fa\u4fe1\u606f&#xff1a;<img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"855\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250419154133-6803c42d8c84c.png\" width=\"1017\" \/><\/p>\n<p>\u53ef\u4ee5\u770b\u5230\u6a21\u578b\u7684\u6700\u540e\u4e00\u5c42\u662f\u4e00\u4e2alm_head\u7ebf\u6027\u5c42,\u8fd9\u4e00\u5c42\u5176\u5b9e\u5c31\u662f\u5bf9\u5e94\u6a21\u578b\u8fdb\u884cLM\u4efb\u52a1\u7528\u7684\u5934&#xff0c;\u5f53\u6211\u4eec\u4f7f\u7528AutoModelForCausalLM\u52a0\u8f7d\u6a21\u578b\u65f6&#xff0c;\u4e0e\u539f\u672c\u6a21\u578b\u4efb\u52a1\u4e00\u81f4&#xff0c;\u4e0d\u4f1a\u51fa\u73b0\u95ee\u9898\u3002<\/p>\n<p>\u4f46\u662f\u5f53\u6211\u4eec\u4f7f\u7528AutoModelForSequenceClassification\u52a0\u8f7d\u6a21\u578b&#xff0c;\u5e76\u8f93\u51fa\u6a21\u578b\u7ed3\u6784\u4fe1\u606f\u65f6&#xff1a;<\/p>\n<p>model &#061;  AutoModelForSequenceClassification.from_pretrained(<br \/>\n            pretrained_model_name_or_path&#061;model_checkpoints,<br \/>\n            num_labels&#061;num_labels,<br \/>\n            device_map&#061;&#034;auto&#034;,<br \/>\n            offload_folder&#061;&#034;offload&#034;,<br \/>\n            trust_remote_code&#061;True,<br \/>\n        )<br \/>\nprint(model) <\/p>\n<p>\u5c31\u4f1a\u51fa\u73b0Some weights of the model checkpoint at \/home\/user_m\/models\/Ministral-8B-Instruct-2410 were not used when initializing MistralForSequenceClassification: [&#039;lm_head.weight&#039;]\u4fe1\u606f&#xff0c;\u4e14\u6a21\u578b\u7ed3\u6784\u5982\u4e0b&#xff1a;<img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"862\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250419154133-6803c42ddc6dd.png\" width=\"1032\" \/><\/p>\n<p>\u6ce8\u610f&#xff0c;\u53ef\u4ee5\u770b\u5230\u6a21\u578b\u6700\u540e\u4e00\u5c42\u7531lm_head\u53d8\u6210\u4e86score&#xff0c;\u8fd9\u662f\u56e0\u4e3a\u4f7f\u7528AutoModelForSequenceClassification\u65f6&#xff0c;\u4f1a\u81ea\u52a8\u7ed9\u6a21\u578b\u9002\u914dscore\u5206\u7c7b\u5934\u7528\u4e8e\u5206\u7c7b\u4efb\u52a1&#xff0c;\u6240\u4ee5\u518d\u6765\u5206\u6790\u63d0\u793a\u4fe1\u606f&#xff1a;<\/p>\n<p>Some weights of the model checkpoint at \/home\/user_m\/models\/Ministral-8B-Instruct-2410 were not used when initializing MistralForSequenceClassification: [&#039;lm_head.weight&#039;]<\/p>\n<p>Some weights of MistralForSequenceClassification were not initialized from the model checkpoint at \/home\/user_m\/models\/Ministral-8B-Instruct-2410 and are newly initialized: [&#039;score.weight&#039;] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.<\/p>\n<p>\u7531\u4e8e\u6211\u4eec\u5c06\u6a21\u578b\u7528\u4e8eSequenceClassification\u4efb\u52a1&#xff0c;\u539f\u672c\u7684lm_head\u5c42\u88ab\u66ff\u6362\u6210\u4e86score\u5c42&#xff0c;\u8fd9\u6837\u4f7f\u5f97\u539f\u672ccheckpoint\u4e2dlm_head\u76f8\u5173\u6743\u91cd\u6ca1\u6cd5\u52a0\u8f7d&#xff0c;\u5e76\u4e14checkpoint\u4e2d\u539f\u672c\u5c31\u5e76\u4e0d\u5b58\u5728score\u6743\u91cd&#xff0c;\u6240\u4ee5\u624d\u4f1a\u51fa\u73b0\u4e0a\u8ff0\u4fe1\u606f\u63d0\u793a\u3002\u7531\u4e8e\u4ece\u65b0\u521d\u59cb\u5316\u4e86score\u5c42&#xff0c;\u6240\u4ee5\u5f53\u6a21\u578b\u8ba1\u7b97\u5230score\u5c42\u65f6&#xff0c;\u5229\u7528\u8fd9\u4e2a\u672a\u7ecf\u8bad\u7ec3\u7684\u5206\u7c7b\u5934\u505a\u6700\u540e\u4e00\u6b65\u8f93\u51fa&#xff0c;\u5f97\u5230\u7684\u7ed3\u679c\u7c7b\u4f3c\u4e8e\u968f\u673a\u9009\u62e9\u4e5f\u5e76\u4e0d\u5947\u602a\u3002\u3002\u3002<\/p>\n<p>\u95ee\u9898\u89e3\u51b3&#xff1a;<\/p>\n<p>\u5728\u5206\u6790\u4e86\u4ee3\u7801\u540e&#xff0c;\u6211\u8ba4\u4e3a\u95ee\u9898\u51fa\u5728peft_config\u8fd9\u91cc&#xff0c;\u8fd9\u91cc\u9700\u8981\u6307\u5b9a\u4efb\u52a1\u7c7b\u578btask_type&#061;TaskType.SEQ_CLS<\/p>\n<p>peft_config &#061; LoraConfig(<br \/>\n        task_type&#061;TaskType.SEQ_CLS, r&#061;rank, lora_alpha&#061;alpha, lora_dropout&#061;lora_dropout, bias&#061;bias,<br \/>\n        target_modules&#061;[<br \/>\n            &#034;q_proj&#034;,<br \/>\n            &#034;v_proj&#034;,<br \/>\n            # &#034;score&#034;<br \/>\n        ],<br \/>\n    ) <\/p>\n<p>\u8fd9\u6837\u8bad\u7ec3\u4e4b\u540e&#xff0c;\u518d\u6b21\u4f7f\u7528AutoModelForSequenceClassification\u52a0\u8f7d\u6a21\u578b\u65f6\u867d\u7136\u8fd8\u662f\u4f1a\u51fa\u73b0\u63d0\u793a\u4fe1\u606f&#xff0c;\u4f46\u662f\u8fd9\u4e2a\u4fe1\u606f\u662f\u7531\u4e8e\u52a0\u8f7dbaseline\u6a21\u578b\u51fa\u73b0\u7684&#xff0c;\u5728\u52a0\u8f7d\u5b8cbaseline\u4e4b\u540e&#xff0c;\u4f1a\u518d\u52a0\u8f7dpeft\u76f8\u5173\u6743\u91cd\u4fe1\u606f&#xff0c;\u800cscore\u6743\u91cd\u4fe1\u606f\u4f1a\u7531peft\u8fdb\u884c\u4fdd\u5b58&#xff0c;\u6240\u4ee5\u8fdb\u884c\u8bc4\u4f30\u65f6&#xff1a;<img loading=\"lazy\" decoding=\"async\" alt=\"\" height=\"202\" src=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250419154134-6803c42e5aa6e.png\" width=\"1200\" \/><\/p>\n<p>\u53d1\u73b0accuracy\u8fbe\u52300.86&#xff0c;\u662f\u4e00\u4e2a\u5408\u7406\u7684\u503c&#xff0c;\u8bf4\u660e\u5fae\u8c03\u6210\u529f&#xff0c;\u95ee\u9898\u89e3\u51b3&#xff01;<\/p>\n<p>\u603b\u7ed3\u6765\u8bf4&#xff0c;\u6a21\u578b\u5176\u5b9e\u662f\u5305\u542b\u4e24\u4e2a\u90e8\u5206&#xff0c;\u4e3b\u4f53\u6a21\u578b&#043;\u5904\u7406\u76f8\u5173\u4efb\u52a1\u7684\u4e0b\u6e38\u6a21\u578b&#xff08;\u6bd4\u5982lm_head\u3001score\u7b49&#xff09;&#xff0c;\u5c06\u672c\u6765\u7528\u4e8e\u4efb\u52a1A\u7684\u6a21\u578b\u5e94\u7528\u4e8e\u4efb\u52a1B\u65f6&#xff0c;\u5176\u5b9e\u662f\u4f1a\u5c06\u5904\u7406A\u7684\u4e0b\u6e38\u6a21\u578b\u66ff\u6362\u4e3a\u5904\u7406B\u7684\u4e0b\u6e38\u6a21\u578b&#xff0c;\u6240\u4ee5\u5982\u679c\u6211\u4eec\u5e0c\u671b\u6a21\u578b\u80fd\u6709\u826f\u597d\u7684\u8868\u73b0&#xff0c;\u5c31\u9700\u8981\u786e\u4fdd\u66ff\u6362\u540e\u7684\u4e0b\u6e38\u6a21\u578b\u4e5f\u662f\u7ecf\u8fc7\u826f\u597d\u8bad\u7ec3\u800c\u4e0d\u662f\u968f\u673a\u521d\u59cb\u5316\u7684&#xff0c;\u8fd9\u4e00\u6b65\u9700\u8981\u5728\u5fae\u8c03\u6a21\u578b\u6216\u8bad\u7ec3\u6a21\u578b\u65f6\u683c\u5916\u6ce8\u610f\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb899\u6b21\uff0c\u70b9\u8d5e20\u6b21\uff0c\u6536\u85cf30\u6b21\u3002\u8fd9\u662f\u56e0\u4e3a\u4f7f\u7528AutoModelForSequenceClassification\u65f6\uff0c\u4f1a\u81ea\u52a8\u7ed9\u6a21\u578b\u9002\u914dscore\u5206\u7c7b\u5934\u7528\u4e8e\u5206\u7c7b\u4efb\u52a1\uff0c\u6240\u4ee5\u518d\u6765\u5206\u6790\u63d0\u793a\u4fe1\u606f\uff1a_ministral 8b<\/p>\n","protected":false},"author":2,"featured_media":28305,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[50,86],"topic":[],"class_list":["post-28313","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-server","tag-50","tag-86"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v20.3 - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>\u670d\u52a1\u5668\u672c\u5730\u5fae\u8c03Ministral-8B\u6a21\u578b\u8fdb\u884cNLI\u4efb\u52a1 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.wsisp.com\/helps\/28313.html\" \/>\n<meta property=\"og:locale\" content=\"zh_CN\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"\u670d\u52a1\u5668\u672c\u5730\u5fae\u8c03Ministral-8B\u6a21\u578b\u8fdb\u884cNLI\u4efb\u52a1 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"og:description\" content=\"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb899\u6b21\uff0c\u70b9\u8d5e20\u6b21\uff0c\u6536\u85cf30\u6b21\u3002\u8fd9\u662f\u56e0\u4e3a\u4f7f\u7528AutoModelForSequenceClassification\u65f6\uff0c\u4f1a\u81ea\u52a8\u7ed9\u6a21\u578b\u9002\u914dscore\u5206\u7c7b\u5934\u7528\u4e8e\u5206\u7c7b\u4efb\u52a1\uff0c\u6240\u4ee5\u518d\u6765\u5206\u6790\u63d0\u793a\u4fe1\u606f\uff1a_ministral 8b\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.wsisp.com\/helps\/28313.html\" \/>\n<meta property=\"og:site_name\" content=\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\" \/>\n<meta property=\"article:published_time\" content=\"2025-04-19T15:41:35+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250419154131-6803c42b42edb.png\" \/>\n<meta name=\"author\" content=\"admin\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u4f5c\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"admin\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4\" \/>\n\t<meta name=\"twitter:data2\" content=\"10 \u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/28313.html\",\"url\":\"https:\/\/www.wsisp.com\/helps\/28313.html\",\"name\":\"\u670d\u52a1\u5668\u672c\u5730\u5fae\u8c03Ministral-8B\u6a21\u578b\u8fdb\u884cNLI\u4efb\u52a1 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"isPartOf\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\"},\"datePublished\":\"2025-04-19T15:41:35+00:00\",\"dateModified\":\"2025-04-19T15:41:35+00:00\",\"author\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\"},\"breadcrumb\":{\"@id\":\"https:\/\/www.wsisp.com\/helps\/28313.html#breadcrumb\"},\"inLanguage\":\"zh-Hans\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.wsisp.com\/helps\/28313.html\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/28313.html#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u9996\u9875\",\"item\":\"https:\/\/www.wsisp.com\/helps\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"\u670d\u52a1\u5668\u672c\u5730\u5fae\u8c03Ministral-8B\u6a21\u578b\u8fdb\u884cNLI\u4efb\u52a1\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#website\",\"url\":\"https:\/\/www.wsisp.com\/helps\/\",\"name\":\"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3\",\"description\":\"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}\"},\"query-input\":\"required name=search_term_string\"}],\"inLanguage\":\"zh-Hans\"},{\"@type\":\"Person\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41\",\"name\":\"admin\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"zh-Hans\",\"@id\":\"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"contentUrl\":\"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery\",\"caption\":\"admin\"},\"sameAs\":[\"http:\/\/wp.wsisp.com\"],\"url\":\"https:\/\/www.wsisp.com\/helps\/author\/admin\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"\u670d\u52a1\u5668\u672c\u5730\u5fae\u8c03Ministral-8B\u6a21\u578b\u8fdb\u884cNLI\u4efb\u52a1 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.wsisp.com\/helps\/28313.html","og_locale":"zh_CN","og_type":"article","og_title":"\u670d\u52a1\u5668\u672c\u5730\u5fae\u8c03Ministral-8B\u6a21\u578b\u8fdb\u884cNLI\u4efb\u52a1 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","og_description":"\u6587\u7ae0\u6d4f\u89c8\u9605\u8bfb899\u6b21\uff0c\u70b9\u8d5e20\u6b21\uff0c\u6536\u85cf30\u6b21\u3002\u8fd9\u662f\u56e0\u4e3a\u4f7f\u7528AutoModelForSequenceClassification\u65f6\uff0c\u4f1a\u81ea\u52a8\u7ed9\u6a21\u578b\u9002\u914dscore\u5206\u7c7b\u5934\u7528\u4e8e\u5206\u7c7b\u4efb\u52a1\uff0c\u6240\u4ee5\u518d\u6765\u5206\u6790\u63d0\u793a\u4fe1\u606f\uff1a_ministral 8b","og_url":"https:\/\/www.wsisp.com\/helps\/28313.html","og_site_name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","article_published_time":"2025-04-19T15:41:35+00:00","og_image":[{"url":"https:\/\/www.wsisp.com\/helps\/wp-content\/uploads\/2025\/04\/20250419154131-6803c42b42edb.png"}],"author":"admin","twitter_card":"summary_large_image","twitter_misc":{"\u4f5c\u8005":"admin","\u9884\u8ba1\u9605\u8bfb\u65f6\u95f4":"10 \u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.wsisp.com\/helps\/28313.html","url":"https:\/\/www.wsisp.com\/helps\/28313.html","name":"\u670d\u52a1\u5668\u672c\u5730\u5fae\u8c03Ministral-8B\u6a21\u578b\u8fdb\u884cNLI\u4efb\u52a1 - \u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","isPartOf":{"@id":"https:\/\/www.wsisp.com\/helps\/#website"},"datePublished":"2025-04-19T15:41:35+00:00","dateModified":"2025-04-19T15:41:35+00:00","author":{"@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41"},"breadcrumb":{"@id":"https:\/\/www.wsisp.com\/helps\/28313.html#breadcrumb"},"inLanguage":"zh-Hans","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.wsisp.com\/helps\/28313.html"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.wsisp.com\/helps\/28313.html#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u9996\u9875","item":"https:\/\/www.wsisp.com\/helps"},{"@type":"ListItem","position":2,"name":"\u670d\u52a1\u5668\u672c\u5730\u5fae\u8c03Ministral-8B\u6a21\u578b\u8fdb\u884cNLI\u4efb\u52a1"}]},{"@type":"WebSite","@id":"https:\/\/www.wsisp.com\/helps\/#website","url":"https:\/\/www.wsisp.com\/helps\/","name":"\u7f51\u7855\u4e92\u8054\u5e2e\u52a9\u4e2d\u5fc3","description":"\u9999\u6e2f\u670d\u52a1\u5668_\u9999\u6e2f\u4e91\u670d\u52a1\u5668\u8d44\u8baf_\u670d\u52a1\u5668\u5e2e\u52a9\u6587\u6863_\u670d\u52a1\u5668\u6559\u7a0b","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.wsisp.com\/helps\/?s={search_term_string}"},"query-input":"required name=search_term_string"}],"inLanguage":"zh-Hans"},{"@type":"Person","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/358e386c577a3ab51c4493330a20ad41","name":"admin","image":{"@type":"ImageObject","inLanguage":"zh-Hans","@id":"https:\/\/www.wsisp.com\/helps\/#\/schema\/person\/image\/","url":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","contentUrl":"https:\/\/gravatar.wp-china-yes.net\/avatar\/?s=96&d=mystery","caption":"admin"},"sameAs":["http:\/\/wp.wsisp.com"],"url":"https:\/\/www.wsisp.com\/helps\/author\/admin"}]}},"_links":{"self":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/28313","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/comments?post=28313"}],"version-history":[{"count":0,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/posts\/28313\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media\/28305"}],"wp:attachment":[{"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/media?parent=28313"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/categories?post=28313"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/tags?post=28313"},{"taxonomy":"topic","embeddable":true,"href":"https:\/\/www.wsisp.com\/helps\/wp-json\/wp\/v2\/topic?post=28313"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}