@article {2724, title = {Application of Dimension Reduction to CAT Item Selection Under the Bifactor Model}, journal = {Applied Psychological Measurement}, volume = {43}, number = {6}, year = {2019}, pages = {419-434}, abstract = {Multidimensional computerized adaptive testing (MCAT) based on the bifactor model is suitable for tests with multidimensional bifactor measurement structures. Several item selection methods that proved to be more advantageous than the maximum Fisher information method are not practical for bifactor MCAT due to time-consuming computations resulting from high dimensionality. To make them applicable in bifactor MCAT, dimension reduction is applied to four item selection methods, which are the posterior-weighted Fisher D-optimality (PDO) and three non-Fisher information-based methods{\textemdash}posterior expected Kullback{\textendash}Leibler information (PKL), continuous entropy (CE), and mutual information (MI). They were compared with the Bayesian D-optimality (BDO) method in terms of estimation precision. When both the general and group factors are the measurement objectives, BDO, PDO, CE, and MI perform equally well and better than PKL. When the group factors represent nuisance dimensions, MI and CE perform the best in estimating the general factor, followed by the BDO, PDO, and PKL. How the bifactor pattern and test length affect estimation accuracy was also discussed.}, doi = {10.1177/0146621618813086}, url = {https://doi.org/10.1177/0146621618813086}, author = {Xiuzhen Mao and Jiahui Zhang and Tao Xin} } @article {2692, title = {A Hybrid Strategy to Construct Multistage Adaptive Tests}, journal = {Applied Psychological Measurement}, volume = {42}, number = {8}, year = {2018}, pages = {630-643}, abstract = {How to effectively construct multistage adaptive test (MST) panels is a topic that has spurred recent advances. The most commonly used approaches for MST assembly use one of two strategies: bottom-up and top-down. The bottom-up approach splits the whole test into several modules, and each module is built first, then all modules are compiled to obtain the whole test, while the top-down approach follows the opposite direction. Both methods have their pros and cons, and sometimes neither is convenient for practitioners. This study provides an innovative hybrid strategy to build optimal MST panels efficiently most of the time. Empirical data and results by using this strategy will be provided.}, doi = {10.1177/0146621618762739}, url = {https://doi.org/10.1177/0146621618762739}, author = {Xinhui Xiong} } @article {2624, title = {Latent Class Analysis of Recurrent Events in Problem-Solving Items}, journal = {Applied Psychological Measurement}, volume = {42}, number = {6}, year = {2018}, pages = {478-498}, abstract = {Computer-based assessment of complex problem-solving abilities is becoming more and more popular. In such an assessment, the entire problem-solving process of an examinee is recorded, providing detailed information about the individual, such as behavioral patterns, speed, and learning trajectory. The problem-solving processes are recorded in a computer log file which is a time-stamped documentation of events related to task completion. As opposed to cross-sectional response data from traditional tests, process data in log files are massive and irregularly structured, calling for effective exploratory data analysis methods. Motivated by a specific complex problem-solving item {\textquotedblleft}Climate Control{\textquotedblright} in the 2012 Programme for International Student Assessment, the authors propose a latent class analysis approach to analyzing the events occurred in the problem-solving processes. The exploratory latent class analysis yields meaningful latent classes. Simulation studies are conducted to evaluate the proposed approach.}, doi = {10.1177/0146621617748325}, url = {https://doi.org/10.1177/0146621617748325}, author = {Haochen Xu and Guanhua Fang and Yunxiao Chen and Jingchen Liu and Zhiliang Ying} } @conference {2646, title = {Item Pool Design and Evaluation}, booktitle = {IACAT 2017 Conference}, year = {2017}, month = {08/2017}, publisher = {Niigata Seiryo University}, organization = {Niigata Seiryo University}, address = {Niigata, Japan}, abstract = {

Early work on CAT tended to use existing sets of items which came from fixed length test forms. These sets of items were selected to meet much different requirements than are needed for a CAT; decision making or covering a content domain. However, there was also some early work that suggested having items equally distributed over the range of proficiency that was of interest or concentrated at a decision point. There was also some work that showed that there was bias in proficiency estimates when an item pool was too easy or too hard. These early findings eventually led to work on item pool design and, more recently, on item pool evaluation. This presentation gives a brief overview of these topics to give some context for the following presentations in this symposium.

Session Video

}, keywords = {CAT, Item Pool Design}, url = {https://drive.google.com/open?id=1ZAsqm1yNZlliqxEHcyyqQ_vOSu20xxZs}, author = {Mark D Reckase and Wei He and Jing-Ru Xu and Xuechun Zhou} } @article {2457, title = {Investigation of Response Changes in the GRE Revised General Test}, journal = {Educational and Psychological Measurement}, volume = {75}, number = {6}, year = {2015}, pages = {1002-1020}, abstract = {Research on examinees{\textquoteright} response changes on multiple-choice tests over the past 80 years has yielded some consistent findings, including that most examinees make score gains by changing answers. This study expands the research on response changes by focusing on a high-stakes admissions test{\textemdash}the Verbal Reasoning and Quantitative Reasoning measures of the GRE revised General Test. We analyzed data from 8,538 examinees for Quantitative and 9,140 for Verbal sections who took the GRE revised General Test in 12 countries. The analyses yielded findings consistent with prior research. In addition, as examinees{\textquoteright} ability increases, the benefit of response changing increases. The study yielded significant implications for both test agencies and test takers. Computer adaptive tests often do not allow the test takers to review and revise. Findings from this study confirm the benefit of such features.}, doi = {10.1177/0013164415573988}, url = {http://epm.sagepub.com/content/75/6/1002.abstract}, author = {Liu, Ou Lydia and Bridgeman, Brent and Gu, Lixiong and Xu, Jun and Kong, Nan} } @article {2303, title = {The Application of the Monte Carlo Approach to Cognitive Diagnostic Computerized Adaptive Testing With Content Constraints}, journal = {Applied Psychological Measurement}, volume = {37}, number = {6}, year = {2013}, pages = {482-496}, abstract = {

The Monte Carlo approach which has previously been implemented in traditional computerized adaptive testing (CAT) is applied here to cognitive diagnostic CAT to test the ability of this approach to address multiple content constraints. The performance of the Monte Carlo approach is compared with the performance of the modified maximum global discrimination index (MMGDI) method on simulations in which the only content constraint is on the number of items that measure each attribute. The results of the two simulation experiments show that (a) the Monte Carlo method fulfills all the test requirements and produces satisfactory measurement precision and item exposure results and (b) the Monte Carlo method outperforms the MMGDI method when the Monte Carlo method applies either the posterior-weighted Kullback\–Leibler algorithm or the hybrid Kullback\–Leibler information as the item selection index. Overall, the recovery rate of the knowledge states, the distribution of the item exposure, and the utilization rate of the item bank are improved when the Monte Carlo method is used.

}, doi = {10.1177/0146621613486015}, url = {http://apm.sagepub.com/content/37/6/482.abstract}, author = {Mao, Xiuzhen and Xin, Tao} } @article {2313, title = {Speededness and Adaptive Testing}, journal = {Journal of Educational and Behavioral Statistics}, volume = {38}, number = {4}, year = {2013}, pages = {418-438}, abstract = {

Two simple constraints on the item parameters in a response\–time model are proposed to control the speededness of an adaptive test. As the constraints are additive, they can easily be included in the constraint set for a shadow-test approach (STA) to adaptive testing. Alternatively, a simple heuristic is presented to control speededness in plain adaptive testing without any constraints. Both types of control are easy to implement and do not require any other real-time parameter estimation during the test than the regular update of the test taker\’s ability estimate. Evaluation of the two approaches using simulated adaptive testing showed that the STA was especially effective. It guaranteed testing times that differed less than 10 seconds from a reference test across a variety of conditions.

}, doi = {10.3102/1076998612466143}, url = {http://jeb.sagepub.com/cgi/content/abstract/38/4/418}, author = {van der Linden, Wim J. and Xiong, Xinhui} } @article {2132, title = {Computerized adaptive testing under nonparametric IRT models}, journal = {Psychometrika}, volume = {71}, year = {2006}, pages = {121-137}, author = {Xu, X. and Douglas, J.} } @article {181, title = {Optimal and nonoptimal computer-based test designs for making pass-fail decisions}, journal = {Applied Measurement in Education}, volume = {19}, number = {3}, year = {2006}, pages = {221-239}, publisher = {Lawrence Erlbaum: US}, abstract = {Now that many credentialing exams are being routinely administered by computer, new computer-based test designs, along with item response theory models, are being aggressively researched to identify specific designs that can increase the decision consistency and accuracy of pass-fail decisions. The purpose of this study was to investigate the impact of optimal and nonoptimal multistage test (MST) designs, linear parallel-form test designs (LPFT), and computer adaptive test (CAT) designs on the decision consistency and accuracy of pass-fail decisions. Realistic testing situations matching those of one of the large credentialing agencies were simulated to increase the generalizability of the findings. The conclusions were clear: (a) With the LPFTs, matching test information functions (TIFs) to the mean of the proficiency distribution produced slightly better results than matching them to the passing score; (b) all of the test designs worked better than test construction using random selection of items, subject to content constraints only; (c) CAT performed better than the other test designs; and (d) if matching a TIP to the passing score, the MST design produced a bit better results than the LPFT design. If an argument for the MST design is to be made, it can be made on the basis of slight improvements over the LPFT design and better expected item bank utilization, candidate preference, and the potential for improved diagnostic feedback, compared with the feedback that is possible with fixed linear test forms. (PsycINFO Database Record (c) 2007 APA, all rights reserved)}, keywords = {adaptive test, credentialing exams, Decision Making, Educational Measurement, multistage tests, optimal computer-based test designs, test form}, isbn = {0895-7347 (Print); 1532-4818 (Electronic)}, author = {Hambleton, R. K. and Xing, D.} } @booklet {1393, title = {Computer-based test designs with optimal and non-optimal tests for making pass-fail decisions}, year = {2004}, address = {Research Report, University of Massachusetts, Amherst, MA}, author = {Hambleton, R. K. and Xing, D.} } @article {2171, title = {Impact of Test Design, Item Quality, and Item Bank Size on the Psychometric Properties of Computer-Based Credentialing Examinations}, journal = {Educational and Psychological Measurement}, volume = {64}, number = {1}, year = {2004}, pages = {5-21}, abstract = {

Computer-based testing by credentialing agencies has become common; however, selecting a test design is difficult because several good ones are available\—parallel forms, computer adaptive (CAT), and multistage (MST). In this study, three computerbased test designs under some common examination conditions were investigated. Item bank size and item quality had a practically significant impact on decision consistency and accuracy. Even in nearly ideal situations, the choice of test design was not a factor in the results. Two conclusions follow from the findings: (a) More time and resources should be committed to expanding the size and quality of item banks, and (b) designs that individualize an exam administration such as MST and CAT may not be helpful when the primary purpose of the examination is to make pass-fail decisions and conditions are present for using parallel forms with a target information function that can be centered on the passing score.

}, doi = {10.1177/0013164403258393}, url = {http://epm.sagepub.com/content/64/1/5.abstract}, author = {Xing, Dehui and Hambleton, Ronald K.} } @article {2170, title = {Psychometric and Psychological Effects of Item Selection and Review on Computerized Testing}, journal = {Educational and Psychological Measurement}, volume = {63}, number = {5}, year = {2003}, pages = {791-808}, abstract = {

Psychometric properties of computerized testing, together with anxiety and comfort of examinees, are investigated in relation to item selection routine and the opportunity for response review. Two different hypotheses involving examinee anxiety were used to design test properties: perceived control and perceived performance. The study involved three types of administration of a computerized English test for Spanish speakers (adaptive, easy adaptive, and fixed) and four review conditions (no review, review at end, review by blocks of 5 items, and review item-by-item). These were applied to a sample of 557 first-year psychology undergraduate students to examine main and interaction effects of test type and review on psychometric and psychological variables. Statistically significant effects were found in test precision among the different types of test. Response review improved ability estimates and increased testing time. No psychological effects on anxiety were found. Examinees in all review conditions considered more important the possibility of review than those who were not allowed to review. These results concur with previous findings on examinees\&$\#$39; preference for item review and raise some issues that should be addressed in the field of tests with item review.

}, doi = {10.1177/0013164403251282}, url = {http://epm.sagepub.com/content/63/5/791.abstract}, author = {Revuelta, Javier and Xim{\'e}nez, M. Carmen and Olea, Julio} } @article {680, title = {Psychometric and psychological effects of item selection and review on computerized testing}, journal = {Educational and Psychological Measurement}, volume = {63}, year = {2003}, pages = {791-808}, author = {Revuelta, J. and Xim{\'e}nez, M. C. and Olea, J.} } @conference {1275, title = {A simulation study to compare CAT strategies for cognitive diagnosis}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2003}, note = {{PDF file, 250 KB}}, address = {Chicago IL}, author = {Xu, X. and Chang, Hua-Hua and Douglas, J.} } @article {379, title = {Small sample estimation in dichotomous item response models: Effect of priors based on judgmental information on the accuracy of item parameter estimates}, journal = {Applied Psychological Measurement}, volume = {27}, number = {1}, year = {2003}, note = {Sage Publications, US}, pages = {27-51}, abstract = {Large item banks with properly calibrated test items are essential for ensuring the validity of computer-based tests. At the same time, item calibrations with small samples are desirable to minimize the amount of pretesting and limit item exposure. Bayesian estimation procedures show considerable promise with small examinee samples. The purposes of the study were (a) to examine how prior information for Bayesian item parameter estimation can be specified and (b) to investigate the relationship between sample size and the specification of prior information on the accuracy of item parameter estimates. The results of the simulation study were clear: Estimation of item response theory (IRT) model item parameters can be improved considerably. Improvements in the one-parameter model were modest; considerable improvements with the two- and three-parameter models were observed. Both the study of different forms of priors and ways to improve the judgmental data used in forming the priors appear to be promising directions for future research. }, author = {Swaminathan, H. and Hambleton, R. K. and Sireci, S. G. and Xing, D. and Rizavi, S. M.} } @conference {1274, title = {Impact of test design, item quality and item bank size on the psychometric properties of computer-based credentialing exams}, booktitle = {Paper presented at the meeting of National Council on Measurement in Education}, year = {2002}, note = {PDF file, 500 K}, address = {New Orleans}, author = {Xing, D. and Hambleton, R. K.} } @booklet {1582, title = {Impact of several computer-based testing variables on the psychometric properties of credentialing examinations (Laboratory of Psychometric and Evaluative Research Report No 393)}, year = {2001}, address = {Amherst, MA: University of Massachusetts, School of Education.}, author = {Xing, D. and Hambleton, R. K.} } @conference {1273, title = {Impact of several computer-based testing variables on the psychometric properties of credentialing examinations}, booktitle = {Paper presented at the Annual Meeting of the National Council on Measurement in Education}, year = {2001}, address = {Seattle WA}, author = {Xing, D. and Hambleton, R. K.} } @conference {1124, title = {Classification accuracy and test security for a computerized adaptive mastery test calibrated with different IRT models}, booktitle = {Paper presented at the annual meeting of the National Council on Measurement in Education}, year = {2000}, address = {New Orleans LA}, author = {Robin, F. and Xing, D. and Scrams, D. and Potenza, M.} } @article {653, title = {Psychometric and psychological effects of review on computerized fixed and adaptive tests}, journal = {Psicolgia}, volume = {21}, year = {2000}, pages = {157-173}, author = {Olea, J. and Revuelta, J. and Ximenez, M. C. and Abad, F. J.} } @book {1724, title = {Computerized adaptive testing strategies: Golden section search, dichotomous search, and Z-score strategies (Doctoral dissertation, Iowa State University, 1990)}, year = {1993}, address = {Dissertation Abstracts International, 54-03B, 1720}, author = {Xiao, B.} } @conference {1272, title = {Dichotomous search strategies for computerized adaptive testing}, booktitle = {Paper presented at the annual meeting of the American Educational Research Association.}, year = {1990}, author = {Xiao, B.} } @conference {1271, title = {Golden section search strategies for computerized adaptive testing}, booktitle = {Paper presented at the Fifth International Objective Measurement Workshop}, year = {1989}, note = {$\#$XI89-01}, address = {Berkeley CA}, author = {Xiao, B.} }