本年3月份开始,就接到通知, 根据《关于开展有关人群第二剂次脊髓灰质炎灭活疫苗补种工作的通知》国疾控卫免发〔2024〕1号文件要求,在2016年3月1日至2019年9月30日之间出生的儿童,凡无接种禁忌者,需补齐2剂次脊髓灰质炎灭活疫苗。由于我家一直是异地注射【在外漂打工,懂的都懂】,疫苗本上信息又特别有限【吐槽-六七年前的疫苗本缺陷太大了:无厂家,无备注是否口服,无备注是灭活还是减毒】,上周去注射被问及6年前的第一针是注射还是口服,瞬间被问住了,记得3年前幼儿园入学前的注射就已经被工作人员问过一次了,问脊髓灰质炎疫苗第二、三针是注射还是口服的,甲肝疫苗是活疫苗还是灭活疫苗。。。
颠末网上各种搜索,通过疫苗本上写的批号到网上查询追溯,最后发现在【中国食品药品检定研究院】https://bio.nifdc.org.cn/pqf/search.do?formAction=pqfQkcx上可以查询,但是这个查询也太难用了,该网站需要厂家+疫苗名+批号三个条件查询,但我只知道批号,其它信息一概不知。。。
作为技术人员,一怒之下,写了个爬虫,把该网站近十年公布的疫苗批次信息全都抓到本地。。。
上菜:- <dependency>
- <groupId>cn.hutool</groupId>
- <artifactId>hutool-http</artifactId>
- <version>5.8.23</version>
- </dependency>
- <dependency>
- <groupId>org.jsoup</groupId>
- <artifactId>jsoup</artifactId>
- <version>1.17.2</version>
- </dependency>
- <dependency>
- <groupId>org.springframework.boot</groupId>
- <artifactId>spring-boot-starter-data-mongodb</artifactId>
- </dependency>
复制代码 - /**
- * 获取疫苗批次
- * @author zhaokk
- * @since 2024/5/26
- */
- public class GetVaccinBatch {
- public static String BASE_URL = "https://bio.nifdc.org.cn/pqf/";
- public static void main(String[] args) throws IOException {
- String[] listUrlArray = {
- //中国食品药品检定研究院
- "search.do?formAction=pqfGsByJG¶meter1=1",
- //北京市药品检验研究院
- "search.do?formAction=pqfGsByJG¶meter1=5b6ea8c91cf9013d011cfdfbda100041",
- //上海市食品药品检验研究院
- "search.do?formAction=pqfGsByJG¶meter1=4028813a1d225be5011d2265474b0004",
- //广东省药品检验所
- "search.do?formAction=pqfGsByJG¶meter1=4028813a1d225be5011d226a9159001c",
- //四川省药品检验研究院(四川省医疗器械检测中心)
- "search.do?formAction=pqfGsByJG¶meter1=4028813a1d225be5011d226ba310001e",
- //湖北省药品监督检验研究院
- "search.do?formAction=pqfGsByJG¶meter1=4028813a1d225be5011d22697942001a",
- //吉林省药品检验研究院
- "search.do?formAction=pqfGsByJG¶meter1=4028813a1d225be5011d226392100002",
- //甘肃省药品检验研究院
- "search.do?formAction=pqfGsByJG¶meter1=4028813a1d225be5011d226c637d0020",
- //重庆市食品药品检验检测研究院
- "search.do?formAction=pqfGsByJG¶meter1=20190917c001",
- //山东省食品药品检验研究院
- "search.do?formAction=pqfGsByJG¶meter1=20190924c001",
- //辽宁省药品检验检测院
- "search.do?formAction=pqfGsByJG¶meter1=20210315c001",
- //云南省食品药品监督检验研究院
- "search.do?formAction=pqfGsByJG¶meter1=20210926c001",
- //河北省药品医疗器械检验研究院
- "search.do?formAction=pqfGsByJG¶meter1=20211011c001",
- //浙江省食品药品检验研究院
- "search.do?formAction=pqfGsByJG¶meter1=20210210c002"
- };
- MongoDbUtils.connect("mongodb://127.0.0.1:27017", "vaccin-batch");
- for (String listUrl : listUrlArray) {
- //发送http请求
- Document document = Jsoup.connect(BASE_URL+listUrl).get();
- Elements aList = document.select("table tr td > a");
- for (int i = aList.size()-1; i >= 0; i--) {
- Element a = aList.get(i);
- String atext = a.text();
- String ahref = a.attr("href");
- String publishDateStr = atext.substring(atext.length()-11, atext.length()-1);
- System.out.println(atext + ":" + ahref);
- System.out.println("公布日期:" + publishDateStr);
- org.bson.Document saveLogDoc = new org.bson.Document();
- saveLogDoc.append("notice_list_url", BASE_URL+listUrl);
- saveLogDoc.append("notice_detail_url", BASE_URL+ahref);
- saveLogDoc.append("notice_title", atext);
- List<org.bson.Document> saveLogList = MongoDbUtils.findBy("vaccin-batch-savelog", saveLogDoc);
- if(!saveLogList.isEmpty()){
- System.out.println(BASE_URL+ahref + "【"+ atext + "】已存在,跳过");
- continue;
- }
- viewDetail(BASE_URL+ahref, atext);
- saveLogDoc.append("publish_date", publishDateStr);
- saveLogDoc.append("create_time", DateUtil.now());
- MongoDbUtils.insert("vaccin-batch-savelog", saveLogDoc);
- }
- }
- }
- public static void viewDetail(String noticeDetailUrl, String noticeTitle) throws IOException {
- // Document document = Jsoup.connect(noticeDetailUrl).get();
- Connection.Response resp = Jsoup.connect(noticeDetailUrl)
- .timeout(60000)
- .method(Connection.Method.GET)
- .maxBodySize(0)
- .followRedirects(false)
- .execute();
- String htmlStr = new String(resp.bodyAsBytes());
- Document document = Jsoup.parse(htmlStr);
- Elements theadList = document.select("table thead tr");
- if(theadList.isEmpty() || theadList.size() != 2){
- throw new RuntimeException("未解析到信息");
- }
- Elements theadCols = theadList.get(1).select("td");
- Elements tbodyList = document.select("table thead + tbody tr");
- if(tbodyList.isEmpty()){
- throw new RuntimeException("未解析到信息");
- }
- for (Element row : tbodyList) {
- Elements cols = row.select("td");
- if(cols.size() != theadCols.size()){
- // break;
- System.out.println(document);
- System.out.println(noticeDetailUrl);
- System.out.println(row);
- throw new RuntimeException("未解析到正确的信息");
- }
- org.bson.Document mongoDoc = new org.bson.Document();
- for (int i = 0; i < cols.size(); i++) {
- String key = FieldEnum.getName(theadCols.get(i).text());
- if(StrUtil.isBlank(key)){
- continue;
- }
- mongoDoc.append(key, cols.get(i).text());
- }
- mongoDoc.append("notice_title", noticeTitle);
- mongoDoc.append("notice_detail_url", noticeDetailUrl);
- //保存数据库
- MongoDbUtils.insert("vaccin-batch", mongoDoc);
- }
- }
- }
- /**
- * @author zhaokk
- * @since 2024/5/26
- */
- public enum FieldEnum {
- PRODUCT_NAME("产品名称", "product_name"),
- SPEC("规格", "spec"),
- BATCH_NO("批号", "batch_no"),
- QUANTITY("签发量", "quantity"),
- VALID_DATE("有效期至", "valid_date"),
- PRODUCER("生产企业", "producer"),
- PRODUCER_ORG("上市许可持有人", "producer"),
- CHECK_NO("收检编号", "check_no"),
- CERT_NO("证书编号", "cert_no"),
- REPORT_NO("报告编号", "report_no"),
- SIGN_DATE("签发日期", "sign_date"),
- SIGN_REMARK("签发结论", "sign_remark"),
- SIGN_ORG("批签发机构", "sign_org")
- ;
- private String remark;
- private String name;
- FieldEnum(String remark, String name) {
- this.remark = remark;
- this.name = name;
- }
- public static String getName(String remark){
- for(FieldEnum value : FieldEnum.values()){
- if(remark.equals(value.getRemark())){
- return value.getName();
- }
- }
- return null;
- }
- public String getRemark() {
- return remark;
- }
- public void setRemark(String remark) {
- this.remark = remark;
- }
- public String getName() {
- return name;
- }
- public void setName(String name) {
- this.name = name;
- }
- }
复制代码 再搭配一道菜mogodb Util,不用跑什么tomcat,运行main函数直接就是开干,最后通过Navicat等工具连上随意检索。

- import com.mongodb.BasicDBObject;
- import com.mongodb.MongoWriteException;
- import com.mongodb.client.*;
- import com.mongodb.client.model.Filters;
- import com.mongodb.client.result.DeleteResult;
- import com.mongodb.client.result.UpdateResult;
- import org.bson.Document;
- import org.bson.conversions.Bson;
- import org.bson.types.ObjectId;
- import java.util.ArrayList;
- import java.util.List;
- /**
- * MongoDb 操作类
- * @author zhaokui
- * 2018年1月31日
- */
- public class MongoDbUtils {
-
- private static MongoDatabase db;
- /**
- * 链接数据库
- *
- * @param uri
- * 主机名 + 端口号
- * @param databaseName
- * 数据库名称
- *
- */
- public static void connect(String uri, String databaseName) {
- MongoClient client = MongoClients.create(uri);
- db = client.getDatabase(databaseName);
- }
-
- public static MongoCollection<Document> getCollection(String collectionName){
- return db.getCollection(collectionName);
- }
-
-
- /**
- * 插入一个文档
- *
- * @param document
- * 文档
- */
- public static void insert(String collectionName, Document document) {
- getCollection(collectionName).insertOne(document);
- }
- /**
- * 插入一个文档
- *
- * @param document
- * 文档
- */
- public static void insertv2(String collectionName, Document document) throws Exception {
- try{
- getCollection(collectionName).insertOne(document);
- }catch(MongoWriteException e) {
- e.printStackTrace();
- }
- }
-
- /**
- * 查找对象 - 根据主键_id
- *
- * @param collectionName
- * @param id
- * @return
- */
- public static Document findById(String collectionName, String id) {
- ObjectId _idobj = null;
- try {
- _idobj = new ObjectId(id);
- } catch (Exception e) {
- return null;
- }
- Document myDoc = getCollection(collectionName).find(Filters.eq("_id", _idobj)).first();
- return myDoc;
- }
-
- /**
- * 查询所有文档
- *
- * @return 所有文档集合
- */
- public static List<Document> findAll(String collectionName) {
- List<Document> results = new ArrayList<Document>();
- FindIterable<Document> iterables = getCollection(collectionName).find();
- MongoCursor<Document> cursor = iterables.iterator();
- while (cursor.hasNext()) {
- results.add(cursor.next());
- }
-
- return results;
- }
-
- /**
- * 查询所有文档
- *
- * @return 所有文档集合
- */
- public static List<Document> findAll(String collectionName, Bson orderBy) {
- List<Document> results = new ArrayList<Document>();
- FindIterable<Document> iterables = getCollection(collectionName).find().sort(orderBy);
- MongoCursor<Document> cursor = iterables.iterator();
- while (cursor.hasNext()) {
- results.add(cursor.next());
- }
-
- return results;
- }
-
- /**
- * 根据条件查询
- *
- * @param filter
- * 查询条件 //注意Bson的几个实现类,BasicDBObject, BsonDocument,
- * BsonDocumentWrapper, CommandResult, Document, RawBsonDocument
- * @return 返回集合列表
- */
- public static List<Document> findBy(String collectionName, Bson filter) {
- List<Document> results = new ArrayList<Document>();
- FindIterable<Document> iterables = getCollection(collectionName).find(filter);
- MongoCursor<Document> cursor = iterables.iterator();
- while (cursor.hasNext()) {
- results.add(cursor.next());
- }
-
- return results;
- }
-
- /**
- * 根据条件查询 + 排序
- *
- * @param filter
- * 查询条件 //注意Bson的几个实现类,BasicDBObject, BsonDocument,
- * BsonDocumentWrapper, CommandResult, Document, RawBsonDocument
- * @return 返回集合列表
- */
- public static List<Document> findBy(String collectionName, Bson filter, Bson orderBy) {
- List<Document> results = new ArrayList<Document>();
- FindIterable<Document> iterables = getCollection(collectionName).find(filter).sort(orderBy);
- MongoCursor<Document> cursor = iterables.iterator();
- while (cursor.hasNext()) {
- results.add(cursor.next());
- }
-
- return results;
- }
-
- public static List<Document> findBy(String collectionName, Bson filter, Bson orderBy, int pageSize) {
- List<Document> results = new ArrayList<Document>();
- FindIterable<Document> iterables = getCollection(collectionName).find(filter).sort(orderBy).limit(pageSize);
- MongoCursor<Document> cursor = iterables.iterator();
- while (cursor.hasNext()) {
- results.add(cursor.next());
- }
- return results;
- }
-
- /** 统计数 */
- public static long getCount(String collectionName, Bson filter) {
- return getCollection(collectionName).countDocuments(filter);
- }
-
- /** 分页查询 */
- public static List<Document> findByPage(String collectionName, Bson filter, int pageNo, int pageSize) {
- List<Document> results = new ArrayList<Document>();
- Bson orderBy = new BasicDBObject("_id", -1);
- MongoCursor<Document> cursor = getCollection(collectionName).find(filter).sort(orderBy).skip((pageNo - 1) * pageSize).limit(pageSize).iterator();
- while (cursor.hasNext()) {
- results.add(cursor.next());
- }
- return results;
- }
-
- /** 分页查询+排序 */
- public static List<Document> findByPage(String collectionName, Bson filter, Bson orderBy, int pageNo, int pageSize) {
- List<Document> results = new ArrayList<Document>();
- MongoCursor<Document> cursor = getCollection(collectionName).find(filter).sort(orderBy).skip((pageNo - 1) * pageSize).limit(pageSize).iterator();
- while (cursor.hasNext()) {
- results.add(cursor.next());
- }
- return results;
- }
-
- /**
- * 更新查询到的第一个
- *
- * @param filter
- * 查询条件
- * @param update
- * 更新文档
- * @return 更新结果
- */
- public static UpdateResult updateOne(String collectionName, Bson filter, Bson update) {
- UpdateResult result = getCollection(collectionName).updateOne(filter, update);
-
- return result;
- }
-
- /**
- * 更新查询到的所有的文档
- *
- * @param filter
- * 查询条件
- * @param update
- * 更新文档
- * @return 更新结果
- */
- public static UpdateResult updateMany(String collectionName, Bson filter, Bson update) {
- UpdateResult result = getCollection(collectionName).updateMany(filter, update);
-
- return result;
- }
-
- /**
- * FIXME
- *
- * @param collectionName
- * @param id
- * @param newdoc
- * @return
- */
- public static Document updateById(String collectionName, String id, Document newdoc) {
- ObjectId _idobj = null;
- try {
- _idobj = new ObjectId(id);
- } catch (Exception e) {
- return null;
- }
- Bson filter = Filters.eq("_id", _idobj);
- // coll.replaceOne(filter, newdoc); // 完全替代
- getCollection(collectionName).updateOne(filter, new Document("$set", newdoc));
- return newdoc;
- }
-
- /**
- * 更新一个文档, 结果是replacement是新文档,老文档完全被替换
- *
- * @param filter
- * 查询条件
- * @param replacement
- * 跟新文档
- */
- public static void replace(String collectionName, Bson filter, Document replacement) {
- getCollection(collectionName).replaceOne(filter, replacement);
- }
-
- /**
- * 根据条件删除一个文档
- *
- * @param filter
- * 查询条件
- */
- public static void deleteOne(String collectionName, Bson filter) {
- getCollection(collectionName).deleteOne(filter);
- }
-
- /**
- * 根据条件删除多个文档
- *
- * @param filter
- * 查询条件
- */
- public static void deleteMany(String collectionName, Bson filter) {
- getCollection(collectionName).deleteMany(filter);
- }
-
- /**
- * 通过ID删除
- *
- * @param collectionName
- * @param id
- * @return
- */
- public static long deleteById(String collectionName, String id) {
- long count = 0;
- ObjectId _id = null;
- try {
- _id = new ObjectId(id);
- } catch (Exception e) {
- return 0;
- }
- Bson filter = Filters.eq("_id", _id);
- DeleteResult deleteResult = getCollection(collectionName).deleteOne(filter);
- count = deleteResult.getDeletedCount();
- return count;
- }
-
- }
复制代码
翻译
搜索
复制
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。 |