In addition to Weibo, there is also WeChat
Please pay attention
WeChat public account
Shulou
2025-04-11 Update From: SLTechnology News&Howtos shulou NAV: SLTechnology News&Howtos > Development >
Share
Shulou(Shulou.com)06/02 Report--
This article mainly explains "how to use the pytorch framework", the content of the article is simple and clear, easy to learn and understand, now please follow the editor's ideas slowly in depth, together to study and learn "how to use the pytorch framework" bar!
Chinese News emotion Classification Bert-Pytorch-transformers
Using the pytorch framework and transformers package, as well as the Chinese pre-training model of Bert
File directory
Data
Train_DataSet.csv
Train_DataSet_Label.csv
Main.py
NewsData.py
# main.py
From transformers import BertTokenizer
From transformers import BertForSequenceClassification
From transformers import BertConfig
From transformers import BertPreTrainedModel
Import torch
Import torch.nn as nn
From transformers import BertModel
Import time
Import argparse
From NewsData import NewsData
Import os
Def get_train_args ():
Parser=argparse.ArgumentParser ()
Parser.add_argument ('--batch_size',type=int,default=10,help = 'quantity per batch')
Parser.add_argument ('--nepoch',type=int,default=3,help = 'training rounds')
Parser.add_argument ('--lr',type=float,default=0.001,help = 'learning rate')
Parser.add_argument ('--gpu',type=bool,default=True,help = 'whether to use gpu')
Parser.add_argument ('--number of threads used by num_workers',type=int,default=2,help='dataloader')
Parser.add_argument ('--number of num_labels',type=int,default=3,help=' categories')
Parser.add_argument ('--data_path',type=str,default='./data',help=' data path')
Opt=parser.parse_args ()
Print (opt)
Return opt
Def get_model (opt):
# Class method. From _ pretrained () gets the pre-training model, and num_labels is the number of classes classified.
Model = BertForSequenceClassification.from_pretrained ('bert-base-chinese',num_labels=opt.num_labels)
Return model
Def get_data (opt):
# NewsData inherits from pytorch's Dataset class
Trainset = NewsData (opt.data_path,is_train = 1)
Trainloader=torch.utils.data.DataLoader (trainset,batch_size=opt.batch_size,shuffle=True,num_workers=opt.num_workers)
Testset = NewsData (opt.data_path,is_train = 0)
Testloader=torch.utils.data.DataLoader (testset,batch_size=opt.batch_size,shuffle=False,num_workers=opt.num_workers)
Return trainloader,testloader
Def train (epoch,model,trainloader,testloader,optimizer,opt):
Print ('\ ntrain-Epoch:% d'% (epoch+1))
Model.train ()
Start_time = time.time ()
Print_step = int (len (trainloader) / 10)
For batch_idx, (sue,label,posi) in enumerate (trainloader):
If opt.gpu:
Sue = sue.cuda ()
Posi = posi.cuda ()
Label = label.unsqueeze (1) .cuda ()
Optimizer.zero_grad ()
# input parameters are word list, location list, label
Outputs = model (sue, position_ids=posi,labels = label)
Loss, logits = outputs [0], outputs [1]
Loss.backward ()
Optimizer.step ()
If batch_idx% print_step = = 0:
Print ("Epoch:%d [d |% d] loss:%f"% (epoch+1,batch_idx,len (trainloader), loss.mean ()
Print ("time:%.3f"% (time.time ()-start_time))
Def test (epoch,model,trainloader,testloader,opt):
Print ('\ ntest-Epoch:% d'% (epoch+1))
Model.eval ()
Total=0
Correct=0
With torch.no_grad ():
For batch_idx, (sue,label,posi) in enumerate (testloader):
If opt.gpu:
Sue = sue.cuda ()
Posi = posi.cuda ()
Labels = label.unsqueeze (1) .cuda ()
Label = label.cuda ()
Else:
Labels = label.unsqueeze (1)
Outputs = model (sue, labels=labels)
Loss, logits = outputs [: 2]
_, predicted=torch.max (logits.data,1)
Total+=sue.size (0)
Correct+=predicted.data.eq (label.data). Cpu (). Sum ()
S = ("Acc:%.3f"% ((1.0*correct.numpy ()) / total))
Print (s)
If _ _ name__=='__main__':
Opt = get_train_args ()
Model = get_model (opt)
Trainloader,testloader = get_data (opt)
If opt.gpu:
Model.cuda ()
Optimizer=torch.optim.SGD (model.parameters (), lr=opt.lr,momentum=0.9)
If not os.path.exists ('. / model.pth'):
For epoch in range (opt.nepoch):
Train (epoch,model,trainloader,testloader,optimizer,opt)
Test (epoch,model,trainloader,testloader,opt)
Torch.save (model.state_dict (),'. / model.pth')
Else: which hospital has good http://www.120kdfk.com/ for gynecology treatment in Zhengzhou?
Model.load_state_dict (torch.load ('model.pth'))
Print ('model exists, direct test')
Test (0meme model recorder loader dint testloader recorder opt)
# NewsData.py
From transformers import BertTokenizer
From transformers import BertForSequenceClassification
From transformers import BertConfig
From transformers import BertPreTrainedModel
Import torch
Import torch.nn as nn
From transformers import BertModel
Import time
Import argparse
Class NewsData (torch.utils.data.Dataset):
Def _ _ init__ (self,root,is_train = 1):
Self.tokenizer = BertTokenizer.from_pretrained ('bert-base-chinese')
Self.data_num = 7346
Self.x_list = []
Self.y_list = []
Self.posi = []
With open (root +'/ Train_DataSet.csv',encoding='UTF-8') as f:
For i in range (self.data_num+1):
Line = f.readline () [:-1] + 'this is a neutral data'
Data_one_str = line.split (',') [len (line.split (',')-2]
Data_two_str = line.split (',') [len (line.split (',')-1]
If len (data_one_str) < 6:
Z = len (data_one_str)
Data_one_str = data_one_str +','+ data_two_ str0: min (200Jing len (data_two_str))]
Else:
Data_one_str = data_one_str
If iTunes 0:
Continue
Word_l = self.tokenizer.encode (data_one_str, add_special_tokens=False)
If len (word_l)
Welcome to subscribe "Shulou Technology Information " to get latest news, interesting things and hot topics in the IT industry, and controls the hottest and latest Internet news, technology news and IT industry trends.
Views: 207
*The comments in the above article only represent the author's personal views and do not represent the views and positions of this website. If you have more insights, please feel free to contribute and share.
Continue with the installation of the previous hadoop.First, install zookooper1. Decompress zookoope
"Every 5-10 years, there's a rare product, a really special, very unusual product that's the most un
© 2024 shulou.com SLNews company. All rights reserved.